Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 1 | /* |
Alexander Steffen | 55f3324 | 2017-06-30 09:22:17 +0200 | [diff] [blame] | 2 | * This file is part of the MicroPython project, http://micropython.org/ |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 3 | * |
| 4 | * The MIT License (MIT) |
| 5 | * |
| 6 | * Copyright (c) 2013, 2014 Damien P. George |
Paul Sokolovsky | 8fea833 | 2019-01-31 11:55:21 +0300 | [diff] [blame] | 7 | * Copyright (c) 2014-2016 Paul Sokolovsky |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 8 | * |
| 9 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 10 | * of this software and associated documentation files (the "Software"), to deal |
| 11 | * in the Software without restriction, including without limitation the rights |
| 12 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 13 | * copies of the Software, and to permit persons to whom the Software is |
| 14 | * furnished to do so, subject to the following conditions: |
| 15 | * |
| 16 | * The above copyright notice and this permission notice shall be included in |
| 17 | * all copies or substantial portions of the Software. |
| 18 | * |
| 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 20 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 21 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 22 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 23 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 24 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| 25 | * THE SOFTWARE. |
| 26 | */ |
| 27 | |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 28 | #include <string.h> |
| 29 | #include <assert.h> |
| 30 | |
Damien George | 51dfcb4 | 2015-01-01 20:27:54 +0000 | [diff] [blame] | 31 | #include "py/objstr.h" |
| 32 | #include "py/objlist.h" |
Damien George | 51dfcb4 | 2015-01-01 20:27:54 +0000 | [diff] [blame] | 33 | #include "py/runtime.h" |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 34 | |
Paul Sokolovsky | 9731912 | 2014-06-13 22:01:26 +0300 | [diff] [blame] | 35 | #if MICROPY_PY_BUILTINS_STR_UNICODE |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 36 | |
Angus Gratton | decf8e6 | 2024-02-27 15:32:29 +1100 | [diff] [blame] | 37 | static mp_obj_t mp_obj_new_str_iterator(mp_obj_t str, mp_obj_iter_buf_t *iter_buf); |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 38 | |
| 39 | /******************************************************************************/ |
| 40 | /* str */ |
| 41 | |
Angus Gratton | decf8e6 | 2024-02-27 15:32:29 +1100 | [diff] [blame] | 42 | static void uni_print_quoted(const mp_print_t *print, const byte *str_data, uint str_len) { |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 43 | // this escapes characters, but it will be very slow to print (calling print many times) |
| 44 | bool has_single_quote = false; |
| 45 | bool has_double_quote = false; |
| 46 | for (const byte *s = str_data, *top = str_data + str_len; !has_double_quote && s < top; s++) { |
| 47 | if (*s == '\'') { |
| 48 | has_single_quote = true; |
| 49 | } else if (*s == '"') { |
| 50 | has_double_quote = true; |
| 51 | } |
| 52 | } |
Damien George | 2e2e404 | 2015-03-19 00:21:29 +0000 | [diff] [blame] | 53 | unichar quote_char = '\''; |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 54 | if (has_single_quote && !has_double_quote) { |
| 55 | quote_char = '"'; |
| 56 | } |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 57 | mp_printf(print, "%c", quote_char); |
Paul Sokolovsky | 00c904b | 2014-06-14 17:48:40 +0300 | [diff] [blame] | 58 | const byte *s = str_data, *top = str_data + str_len; |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 59 | while (s < top) { |
| 60 | unichar ch; |
Paul Sokolovsky | 86d3898 | 2014-06-13 23:00:15 +0300 | [diff] [blame] | 61 | ch = utf8_get_char(s); |
| 62 | s = utf8_next_char(s); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 63 | if (ch == quote_char) { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 64 | mp_printf(print, "\\%c", quote_char); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 65 | } else if (ch == '\\') { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 66 | mp_print_str(print, "\\\\"); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 67 | } else if (32 <= ch && ch <= 126) { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 68 | mp_printf(print, "%c", ch); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 69 | } else if (ch == '\n') { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 70 | mp_print_str(print, "\\n"); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 71 | } else if (ch == '\r') { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 72 | mp_print_str(print, "\\r"); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 73 | } else if (ch == '\t') { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 74 | mp_print_str(print, "\\t"); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 75 | } else if (ch < 0x100) { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 76 | mp_printf(print, "\\x%02x", ch); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 77 | } else if (ch < 0x10000) { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 78 | mp_printf(print, "\\u%04x", ch); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 79 | } else { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 80 | mp_printf(print, "\\U%08x", ch); |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 81 | } |
| 82 | } |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 83 | mp_printf(print, "%c", quote_char); |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 84 | } |
| 85 | |
Angus Gratton | decf8e6 | 2024-02-27 15:32:29 +1100 | [diff] [blame] | 86 | static void uni_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t kind) { |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 87 | GET_STR_DATA_LEN(self_in, str_data, str_len); |
Jim Mussared | f5f9edf | 2022-08-18 15:01:26 +1000 | [diff] [blame] | 88 | #if MICROPY_PY_JSON |
Damien George | 612045f | 2014-09-17 22:56:34 +0100 | [diff] [blame] | 89 | if (kind == PRINT_JSON) { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 90 | mp_str_print_json(print, str_data, str_len); |
Damien George | 612045f | 2014-09-17 22:56:34 +0100 | [diff] [blame] | 91 | return; |
| 92 | } |
| 93 | #endif |
Paul Sokolovsky | 86d3898 | 2014-06-13 23:00:15 +0300 | [diff] [blame] | 94 | if (kind == PRINT_STR) { |
Joris Peeraer | 5020b14 | 2020-10-22 10:38:03 +0200 | [diff] [blame] | 95 | print->print_strn(print->data, (const char *)str_data, str_len); |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 96 | } else { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 97 | uni_print_quoted(print, str_data, str_len); |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 98 | } |
| 99 | } |
| 100 | |
Angus Gratton | decf8e6 | 2024-02-27 15:32:29 +1100 | [diff] [blame] | 101 | static mp_obj_t uni_unary_op(mp_unary_op_t op, mp_obj_t self_in) { |
Paul Sokolovsky | e7f2b4c | 2014-06-13 23:37:18 +0300 | [diff] [blame] | 102 | GET_STR_DATA_LEN(self_in, str_data, str_len); |
| 103 | switch (op) { |
| 104 | case MP_UNARY_OP_BOOL: |
Paul Sokolovsky | 1b586f3 | 2015-10-11 12:09:43 +0300 | [diff] [blame] | 105 | return mp_obj_new_bool(str_len != 0); |
Paul Sokolovsky | e7f2b4c | 2014-06-13 23:37:18 +0300 | [diff] [blame] | 106 | case MP_UNARY_OP_LEN: |
Damien George | 19aee94 | 2018-02-14 18:19:22 +1100 | [diff] [blame] | 107 | return MP_OBJ_NEW_SMALL_INT(utf8_charlen(str_data, str_len)); |
Paul Sokolovsky | e7f2b4c | 2014-06-13 23:37:18 +0300 | [diff] [blame] | 108 | default: |
| 109 | return MP_OBJ_NULL; // op not supported |
| 110 | } |
| 111 | } |
| 112 | |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 113 | // Convert an index into a pointer to its lead byte. Out of bounds indexing will raise IndexError or |
| 114 | // be capped to the first/last character of the string, depending on is_slice. |
Damien George | 999cedb | 2015-11-27 17:01:44 +0000 | [diff] [blame] | 115 | const byte *str_index_to_ptr(const mp_obj_type_t *type, const byte *self_data, size_t self_len, |
Damien George | 69661f3 | 2020-02-27 15:36:53 +1100 | [diff] [blame] | 116 | mp_obj_t index, bool is_slice) { |
Paul Sokolovsky | 6af90b2 | 2016-07-25 14:43:04 +0300 | [diff] [blame] | 117 | // All str functions also handle bytes objects, and they call str_index_to_ptr(), |
| 118 | // so it must handle bytes. |
Andrew Leech | f7f56d4 | 2022-08-10 14:13:17 +1000 | [diff] [blame] | 119 | if (type == &mp_type_bytes |
| 120 | #if MICROPY_PY_BUILTINS_BYTEARRAY |
| 121 | || type == &mp_type_bytearray |
| 122 | #endif |
| 123 | ) { |
Paul Sokolovsky | 6af90b2 | 2016-07-25 14:43:04 +0300 | [diff] [blame] | 124 | // Taken from objstr.c:str_index_to_ptr() |
Damien George | c88cfe1 | 2017-03-23 16:17:40 +1100 | [diff] [blame] | 125 | size_t index_val = mp_get_index(type, self_len, index, is_slice); |
Paul Sokolovsky | 6af90b2 | 2016-07-25 14:43:04 +0300 | [diff] [blame] | 126 | return self_data + index_val; |
| 127 | } |
| 128 | |
Damien George | 40f3c02 | 2014-07-03 13:25:24 +0100 | [diff] [blame] | 129 | mp_int_t i; |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 130 | // Copied from mp_get_index; I don't want bounds checking, just give me |
| 131 | // the integer as-is. (I can't bounds-check without scanning the whole |
| 132 | // string; an out-of-bounds index will be caught in the loops below.) |
Damien George | eee1e88 | 2019-01-30 18:49:52 +1100 | [diff] [blame] | 133 | if (mp_obj_is_small_int(index)) { |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 134 | i = MP_OBJ_SMALL_INT_VALUE(index); |
| 135 | } else if (!mp_obj_get_int_maybe(index, &i)) { |
Jim Mussared | def76fe | 2020-03-02 22:35:22 +1100 | [diff] [blame] | 136 | mp_raise_msg_varg(&mp_type_TypeError, MP_ERROR_TEXT("string indices must be integers, not %s"), mp_obj_get_type_str(index)); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 137 | } |
Paul Sokolovsky | ea2c936 | 2014-06-15 00:35:09 +0300 | [diff] [blame] | 138 | const byte *s, *top = self_data + self_len; |
Damien George | 69661f3 | 2020-02-27 15:36:53 +1100 | [diff] [blame] | 139 | if (i < 0) { |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 140 | // Negative indexing is performed by counting from the end of the string. |
| 141 | for (s = top - 1; i; --s) { |
| 142 | if (s < self_data) { |
| 143 | if (is_slice) { |
| 144 | return self_data; |
| 145 | } |
Jim Mussared | def76fe | 2020-03-02 22:35:22 +1100 | [diff] [blame] | 146 | mp_raise_msg(&mp_type_IndexError, MP_ERROR_TEXT("string index out of range")); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 147 | } |
| 148 | if (!UTF8_IS_CONT(*s)) { |
| 149 | ++i; |
| 150 | } |
| 151 | } |
| 152 | ++s; |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 153 | } else { |
| 154 | // Positive indexing, correspondingly, counts from the start of the string. |
| 155 | // It's assumed that negative indexing will generally be used with small |
| 156 | // absolute values (eg str[-1], not str[-1000000]), which means it'll be |
| 157 | // more efficient this way. |
Paul Sokolovsky | ed1c194 | 2016-07-25 19:02:51 +0300 | [diff] [blame] | 158 | s = self_data; |
| 159 | while (1) { |
| 160 | // First check out-of-bounds |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 161 | if (s >= top) { |
| 162 | if (is_slice) { |
| 163 | return top; |
| 164 | } |
Jim Mussared | def76fe | 2020-03-02 22:35:22 +1100 | [diff] [blame] | 165 | mp_raise_msg(&mp_type_IndexError, MP_ERROR_TEXT("string index out of range")); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 166 | } |
Paul Sokolovsky | ed1c194 | 2016-07-25 19:02:51 +0300 | [diff] [blame] | 167 | // Then check completion |
| 168 | if (i-- == 0) { |
| 169 | break; |
| 170 | } |
| 171 | // Then skip UTF-8 char |
| 172 | ++s; |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 173 | while (UTF8_IS_CONT(*s)) { |
| 174 | ++s; |
| 175 | } |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 176 | } |
| 177 | } |
| 178 | return s; |
| 179 | } |
| 180 | |
Angus Gratton | decf8e6 | 2024-02-27 15:32:29 +1100 | [diff] [blame] | 181 | static mp_obj_t str_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) { |
Damien George | bfbd944 | 2020-01-09 11:01:14 +1100 | [diff] [blame] | 182 | const mp_obj_type_t *type = mp_obj_get_type(self_in); |
Damien George | 0528c5a | 2015-04-04 19:42:03 +0100 | [diff] [blame] | 183 | assert(type == &mp_type_str); |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 184 | GET_STR_DATA_LEN(self_in, self_data, self_len); |
| 185 | if (value == MP_OBJ_SENTINEL) { |
| 186 | // load |
Damien George | 69661f3 | 2020-02-27 15:36:53 +1100 | [diff] [blame] | 187 | #if MICROPY_PY_BUILTINS_SLICE |
Damien George | eee1e88 | 2019-01-30 18:49:52 +1100 | [diff] [blame] | 188 | if (mp_obj_is_type(index, &mp_type_slice)) { |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 189 | mp_obj_t ostart, ostop, ostep; |
Nicko van Someren | 1070984 | 2019-11-20 18:53:07 -0700 | [diff] [blame] | 190 | mp_obj_slice_t *slice = MP_OBJ_TO_PTR(index); |
| 191 | ostart = slice->start; |
| 192 | ostop = slice->stop; |
| 193 | ostep = slice->step; |
| 194 | |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 195 | if (ostep != mp_const_none && ostep != MP_OBJ_NEW_SMALL_INT(1)) { |
Jim Mussared | def76fe | 2020-03-02 22:35:22 +1100 | [diff] [blame] | 196 | mp_raise_NotImplementedError(MP_ERROR_TEXT("only slices with step=1 (aka None) are supported")); |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 197 | } |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 198 | |
Paul Sokolovsky | ea2c936 | 2014-06-15 00:35:09 +0300 | [diff] [blame] | 199 | const byte *pstart, *pstop; |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 200 | if (ostart != mp_const_none) { |
Paul Sokolovsky | ea2c936 | 2014-06-15 00:35:09 +0300 | [diff] [blame] | 201 | pstart = str_index_to_ptr(type, self_data, self_len, ostart, true); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 202 | } else { |
Paul Sokolovsky | ea2c936 | 2014-06-15 00:35:09 +0300 | [diff] [blame] | 203 | pstart = self_data; |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 204 | } |
| 205 | if (ostop != mp_const_none) { |
| 206 | // pstop will point just after the stop character. This depends on |
| 207 | // the \0 at the end of the string. |
Paul Sokolovsky | ea2c936 | 2014-06-15 00:35:09 +0300 | [diff] [blame] | 208 | pstop = str_index_to_ptr(type, self_data, self_len, ostop, true); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 209 | } else { |
Paul Sokolovsky | ea2c936 | 2014-06-15 00:35:09 +0300 | [diff] [blame] | 210 | pstop = self_data + self_len; |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 211 | } |
| 212 | if (pstop < pstart) { |
| 213 | return MP_OBJ_NEW_QSTR(MP_QSTR_); |
| 214 | } |
| 215 | return mp_obj_new_str_of_type(type, (const byte *)pstart, pstop - pstart); |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 216 | } |
Damien George | 69661f3 | 2020-02-27 15:36:53 +1100 | [diff] [blame] | 217 | #endif |
Paul Sokolovsky | ea2c936 | 2014-06-15 00:35:09 +0300 | [diff] [blame] | 218 | const byte *s = str_index_to_ptr(type, self_data, self_len, index, false); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 219 | int len = 1; |
| 220 | if (UTF8_IS_NONASCII(*s)) { |
| 221 | // Count the number of 1 bits (after the first) |
| 222 | for (char mask = 0x40; *s & mask; mask >>= 1) { |
| 223 | ++len; |
| 224 | } |
| 225 | } |
Damien George | 69661f3 | 2020-02-27 15:36:53 +1100 | [diff] [blame] | 226 | return mp_obj_new_str_via_qstr((const char *)s, len); // This will create a one-character string |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 227 | } else { |
| 228 | return MP_OBJ_NULL; // op not supported |
| 229 | } |
| 230 | } |
| 231 | |
Jim Mussared | 662b976 | 2021-07-14 14:38:38 +1000 | [diff] [blame] | 232 | MP_DEFINE_CONST_OBJ_TYPE( |
| 233 | mp_type_str, |
| 234 | MP_QSTR_str, |
Jim Mussared | 6da41b5 | 2022-09-16 23:57:38 +1000 | [diff] [blame] | 235 | MP_TYPE_FLAG_ITER_IS_GETITER, |
Jim Mussared | 94beeab | 2022-09-17 00:31:23 +1000 | [diff] [blame] | 236 | make_new, mp_obj_str_make_new, |
Jim Mussared | 662b976 | 2021-07-14 14:38:38 +1000 | [diff] [blame] | 237 | print, uni_print, |
| 238 | unary_op, uni_unary_op, |
| 239 | binary_op, mp_obj_str_binary_op, |
| 240 | subscr, str_subscr, |
Jim Mussared | 6da41b5 | 2022-09-16 23:57:38 +1000 | [diff] [blame] | 241 | iter, mp_obj_new_str_iterator, |
Jim Mussared | 662b976 | 2021-07-14 14:38:38 +1000 | [diff] [blame] | 242 | buffer, mp_obj_str_get_buffer, |
Jim Mussared | 9dce827 | 2022-06-24 16:27:46 +1000 | [diff] [blame] | 243 | locals_dict, &mp_obj_str_locals_dict |
Jim Mussared | 662b976 | 2021-07-14 14:38:38 +1000 | [diff] [blame] | 244 | ); |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 245 | |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 246 | /******************************************************************************/ |
| 247 | /* str iterator */ |
| 248 | |
| 249 | typedef struct _mp_obj_str_it_t { |
| 250 | mp_obj_base_t base; |
Damien George | 8212d97 | 2016-01-03 16:27:55 +0000 | [diff] [blame] | 251 | mp_fun_1_t iternext; |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 252 | mp_obj_t str; |
Damien George | c0d9500 | 2017-02-16 16:26:48 +1100 | [diff] [blame] | 253 | size_t cur; |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 254 | } mp_obj_str_it_t; |
| 255 | |
Angus Gratton | decf8e6 | 2024-02-27 15:32:29 +1100 | [diff] [blame] | 256 | static mp_obj_t str_it_iternext(mp_obj_t self_in) { |
Damien George | 999cedb | 2015-11-27 17:01:44 +0000 | [diff] [blame] | 257 | mp_obj_str_it_t *self = MP_OBJ_TO_PTR(self_in); |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 258 | GET_STR_DATA_LEN(self->str, str, len); |
| 259 | if (self->cur < len) { |
Paul Sokolovsky | 79b7fe2 | 2014-06-14 02:07:25 +0300 | [diff] [blame] | 260 | const byte *cur = str + self->cur; |
| 261 | const byte *end = utf8_next_char(str + self->cur); |
Damien George | 69661f3 | 2020-02-27 15:36:53 +1100 | [diff] [blame] | 262 | mp_obj_t o_out = mp_obj_new_str_via_qstr((const char *)cur, end - cur); |
Paul Sokolovsky | 79b7fe2 | 2014-06-14 02:07:25 +0300 | [diff] [blame] | 263 | self->cur += end - cur; |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 264 | return o_out; |
| 265 | } else { |
| 266 | return MP_OBJ_STOP_ITERATION; |
| 267 | } |
| 268 | } |
| 269 | |
Angus Gratton | decf8e6 | 2024-02-27 15:32:29 +1100 | [diff] [blame] | 270 | static mp_obj_t mp_obj_new_str_iterator(mp_obj_t str, mp_obj_iter_buf_t *iter_buf) { |
Damien George | ae8d867 | 2016-01-09 23:14:54 +0000 | [diff] [blame] | 271 | assert(sizeof(mp_obj_str_it_t) <= sizeof(mp_obj_iter_buf_t)); |
Damien George | 69661f3 | 2020-02-27 15:36:53 +1100 | [diff] [blame] | 272 | mp_obj_str_it_t *o = (mp_obj_str_it_t *)iter_buf; |
Damien George | 8212d97 | 2016-01-03 16:27:55 +0000 | [diff] [blame] | 273 | o->base.type = &mp_type_polymorph_iter; |
| 274 | o->iternext = str_it_iternext; |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 275 | o->str = str; |
| 276 | o->cur = 0; |
Damien George | 999cedb | 2015-11-27 17:01:44 +0000 | [diff] [blame] | 277 | return MP_OBJ_FROM_PTR(o); |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 278 | } |
| 279 | |
Paul Sokolovsky | 9731912 | 2014-06-13 22:01:26 +0300 | [diff] [blame] | 280 | #endif // MICROPY_PY_BUILTINS_STR_UNICODE |