blob: 028fc9597ffc8480f3a257ec9d41f2a805df6c02 [file] [log] [blame]
Damien George04b91472014-05-03 23:27:38 +01001/*
Alexander Steffen55f33242017-06-30 09:22:17 +02002 * This file is part of the MicroPython project, http://micropython.org/
Damien George04b91472014-05-03 23:27:38 +01003 *
4 * The MIT License (MIT)
5 *
6 * Copyright (c) 2013, 2014 Damien P. George
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 * THE SOFTWARE.
25 */
Alexander Steffen299bc622017-06-29 23:14:58 +020026#ifndef MICROPY_INCLUDED_PY_OBJSTR_H
27#define MICROPY_INCLUDED_PY_OBJSTR_H
Damien George51dfcb42015-01-01 20:27:54 +000028
29#include "py/obj.h"
Andrew Leechf7f56d42022-08-10 14:13:17 +100030#include "py/objarray.h"
Damien George04b91472014-05-03 23:27:38 +010031
Paul Sokolovsky58676fc2014-04-14 01:45:06 +030032typedef struct _mp_obj_str_t {
33 mp_obj_base_t base;
Damien George82b35002022-08-11 16:34:02 +100034 size_t hash;
Paul Sokolovsky58676fc2014-04-14 01:45:06 +030035 // len == number of bytes used in data, alloc = len + 1 because (at the moment) we also append a null byte
Damien Georgec0d95002017-02-16 16:26:48 +110036 size_t len;
Paul Sokolovsky58676fc2014-04-14 01:45:06 +030037 const byte *data;
38} mp_obj_str_t;
39
Andrew Leechf7f56d42022-08-10 14:13:17 +100040// This static assert is used to ensure that mp_obj_str_t and mp_obj_array_t are compatible,
41// meaning that their len and data/items entries are at the same offsets in the struct.
42// This allows the same code to be used for str/bytes and bytearray.
43#define MP_STATIC_ASSERT_STR_ARRAY_COMPATIBLE \
44 MP_STATIC_ASSERT(offsetof(mp_obj_str_t, len) == offsetof(mp_obj_array_t, len) \
45 && offsetof(mp_obj_str_t, data) == offsetof(mp_obj_array_t, items))
46
Damien George69661f32020-02-27 15:36:53 +110047#define MP_DEFINE_STR_OBJ(obj_name, str) mp_obj_str_t obj_name = {{&mp_type_str}, 0, sizeof(str) - 1, (const byte *)str}
Damien George897fe0c2014-04-15 22:03:55 +010048
Paul Sokolovsky97319122014-06-13 22:01:26 +030049// use this macro to extract the string hash
Damien George5f3bda42016-09-02 14:42:53 +100050// warning: the hash can be 0, meaning invalid, and must then be explicitly computed from the data
Paul Sokolovsky97319122014-06-13 22:01:26 +030051#define GET_STR_HASH(str_obj_in, str_hash) \
Damien George82b35002022-08-11 16:34:02 +100052 size_t str_hash; \
Damien Georgeb5986782022-08-10 14:09:22 +100053 if (mp_obj_is_qstr(str_obj_in)) { \
54 str_hash = qstr_hash(MP_OBJ_QSTR_VALUE(str_obj_in)); \
55 } else { \
56 str_hash = ((mp_obj_str_t *)MP_OBJ_TO_PTR(str_obj_in))->hash; \
57 }
Paul Sokolovsky97319122014-06-13 22:01:26 +030058
59// use this macro to extract the string length
60#define GET_STR_LEN(str_obj_in, str_len) \
Damien Georgeb5986782022-08-10 14:09:22 +100061 size_t str_len; \
62 if (mp_obj_is_qstr(str_obj_in)) { \
63 str_len = qstr_len(MP_OBJ_QSTR_VALUE(str_obj_in)); \
64 } else { \
65 str_len = ((mp_obj_str_t *)MP_OBJ_TO_PTR(str_obj_in))->len; \
66 }
Paul Sokolovsky97319122014-06-13 22:01:26 +030067
68// use this macro to extract the string data and length
Damien George4c0176d2019-12-27 23:15:52 +110069#if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_C || MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D
Damien Georgec3f64d92015-11-27 12:23:18 +000070const byte *mp_obj_str_get_data_no_check(mp_obj_t self_in, size_t *len);
Damien George04353cc2015-10-18 23:09:04 +010071#define GET_STR_DATA_LEN(str_obj_in, str_data, str_len) \
Damien Georgeb5986782022-08-10 14:09:22 +100072 size_t str_len; \
73 const byte *str_data = mp_obj_str_get_data_no_check(str_obj_in, &str_len);
Damien George04353cc2015-10-18 23:09:04 +010074#else
Paul Sokolovsky97319122014-06-13 22:01:26 +030075#define GET_STR_DATA_LEN(str_obj_in, str_data, str_len) \
Damien Georgeb5986782022-08-10 14:09:22 +100076 const byte *str_data; \
77 size_t str_len; \
78 if (mp_obj_is_qstr(str_obj_in)) { \
79 str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len); \
80 } else { \
Andrew Leechf7f56d42022-08-10 14:13:17 +100081 MP_STATIC_ASSERT_STR_ARRAY_COMPATIBLE; \
Damien Georgeb5986782022-08-10 14:09:22 +100082 str_len = ((mp_obj_str_t *)MP_OBJ_TO_PTR(str_obj_in))->len; \
83 str_data = ((mp_obj_str_t *)MP_OBJ_TO_PTR(str_obj_in))->data; \
84 }
Damien George04353cc2015-10-18 23:09:04 +010085#endif
Paul Sokolovsky97319122014-06-13 22:01:26 +030086
Damien George5b3f0b72016-01-03 15:55:55 +000087mp_obj_t mp_obj_str_make_new(const mp_obj_type_t *type_in, size_t n_args, size_t n_kw, const mp_obj_t *args);
Damien George999cedb2015-11-27 17:01:44 +000088void mp_str_print_json(const mp_print_t *print, const byte *str_data, size_t str_len);
Damien George4b72b3a2016-01-03 14:21:40 +000089mp_obj_t mp_obj_str_format(size_t n_args, const mp_obj_t *args, mp_map_t *kwargs);
90mp_obj_t mp_obj_str_split(size_t n_args, const mp_obj_t *args);
Jim Mussared6c3d8d32022-08-24 12:22:57 +100091mp_obj_t mp_obj_new_str_copy(const mp_obj_type_t *type, const byte *data, size_t len); // for type=str, input data must be valid utf-8
92mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t *type, const byte *data, size_t len); // for type=str, will check utf-8 (raises UnicodeError)
Paul Sokolovsky97319122014-06-13 22:01:26 +030093
Damien George58321dd2017-08-29 13:04:01 +100094mp_obj_t mp_obj_str_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in);
Damien George4d917232014-08-30 14:28:06 +010095mp_int_t mp_obj_str_get_buffer(mp_obj_t self_in, mp_buffer_info_t *bufinfo, mp_uint_t flags);
Paul Sokolovsky97319122014-06-13 22:01:26 +030096
Jim Mussaredc44b3922022-10-07 11:06:43 +110097void mp_obj_str_set_data(mp_obj_str_t *str, const byte *data, size_t len);
98
Damien George999cedb2015-11-27 17:01:44 +000099const byte *str_index_to_ptr(const mp_obj_type_t *type, const byte *self_data, size_t self_len,
Damien George69661f32020-02-27 15:36:53 +1100100 mp_obj_t index, bool is_slice);
Damien Georgec0d95002017-02-16 16:26:48 +1100101const byte *find_subbytes(const byte *haystack, size_t hlen, const byte *needle, size_t nlen, int direction);
Paul Sokolovskyea2c9362014-06-15 00:35:09 +0300102
Angus Gratton47e84752023-11-23 09:43:06 +1100103#define MP_DEFINE_BYTES_OBJ(obj_name, target, len) mp_obj_str_t obj_name = {{&mp_type_bytes}, 0, (len), (const byte *)(target)}
104
Jim Mussared28aaab92021-07-13 18:01:12 +1000105mp_obj_t mp_obj_bytes_hex(size_t n_args, const mp_obj_t *args, const mp_obj_type_t *type);
106mp_obj_t mp_obj_bytes_fromhex(mp_obj_t type_in, mp_obj_t data);
107
Andrew Leechf7f56d42022-08-10 14:13:17 +1000108extern const mp_obj_dict_t mp_obj_str_locals_dict;
109
Jim Mussared28aaab92021-07-13 18:01:12 +1000110#if MICROPY_PY_BUILTINS_MEMORYVIEW && MICROPY_PY_BUILTINS_BYTES_HEX
111extern const mp_obj_dict_t mp_obj_memoryview_locals_dict;
112#endif
113
Andrew Leechf7f56d42022-08-10 14:13:17 +1000114#if MICROPY_PY_BUILTINS_BYTEARRAY
115extern const mp_obj_dict_t mp_obj_bytearray_locals_dict;
116#endif
117
118#if MICROPY_PY_ARRAY
119extern const mp_obj_dict_t mp_obj_array_locals_dict;
120#endif
121
Alexander Steffen299bc622017-06-29 23:14:58 +0200122#endif // MICROPY_INCLUDED_PY_OBJSTR_H