Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 1 | /* |
Alexander Steffen | 55f3324 | 2017-06-30 09:22:17 +0200 | [diff] [blame] | 2 | * This file is part of the MicroPython project, http://micropython.org/ |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 3 | * |
| 4 | * The MIT License (MIT) |
| 5 | * |
| 6 | * Copyright (c) 2013, 2014 Damien P. George |
| 7 | * Copyright (c) 2014 Paul Sokolovsky |
| 8 | * |
| 9 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 10 | * of this software and associated documentation files (the "Software"), to deal |
| 11 | * in the Software without restriction, including without limitation the rights |
| 12 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 13 | * copies of the Software, and to permit persons to whom the Software is |
| 14 | * furnished to do so, subject to the following conditions: |
| 15 | * |
| 16 | * The above copyright notice and this permission notice shall be included in |
| 17 | * all copies or substantial portions of the Software. |
| 18 | * |
| 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 20 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 21 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 22 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 23 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 24 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| 25 | * THE SOFTWARE. |
| 26 | */ |
| 27 | |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 28 | #include <string.h> |
| 29 | #include <assert.h> |
| 30 | |
Damien George | 51dfcb4 | 2015-01-01 20:27:54 +0000 | [diff] [blame] | 31 | #include "py/objstr.h" |
| 32 | #include "py/objlist.h" |
Damien George | 51dfcb4 | 2015-01-01 20:27:54 +0000 | [diff] [blame] | 33 | #include "py/runtime.h" |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 34 | |
Paul Sokolovsky | 9731912 | 2014-06-13 22:01:26 +0300 | [diff] [blame] | 35 | #if MICROPY_PY_BUILTINS_STR_UNICODE |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 36 | |
Damien George | ae8d867 | 2016-01-09 23:14:54 +0000 | [diff] [blame] | 37 | STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str, mp_obj_iter_buf_t *iter_buf); |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 38 | |
| 39 | /******************************************************************************/ |
| 40 | /* str */ |
| 41 | |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 42 | STATIC void uni_print_quoted(const mp_print_t *print, const byte *str_data, uint str_len) { |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 43 | // this escapes characters, but it will be very slow to print (calling print many times) |
| 44 | bool has_single_quote = false; |
| 45 | bool has_double_quote = false; |
| 46 | for (const byte *s = str_data, *top = str_data + str_len; !has_double_quote && s < top; s++) { |
| 47 | if (*s == '\'') { |
| 48 | has_single_quote = true; |
| 49 | } else if (*s == '"') { |
| 50 | has_double_quote = true; |
| 51 | } |
| 52 | } |
Damien George | 2e2e404 | 2015-03-19 00:21:29 +0000 | [diff] [blame] | 53 | unichar quote_char = '\''; |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 54 | if (has_single_quote && !has_double_quote) { |
| 55 | quote_char = '"'; |
| 56 | } |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 57 | mp_printf(print, "%c", quote_char); |
Paul Sokolovsky | 00c904b | 2014-06-14 17:48:40 +0300 | [diff] [blame] | 58 | const byte *s = str_data, *top = str_data + str_len; |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 59 | while (s < top) { |
| 60 | unichar ch; |
Paul Sokolovsky | 86d3898 | 2014-06-13 23:00:15 +0300 | [diff] [blame] | 61 | ch = utf8_get_char(s); |
| 62 | s = utf8_next_char(s); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 63 | if (ch == quote_char) { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 64 | mp_printf(print, "\\%c", quote_char); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 65 | } else if (ch == '\\') { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 66 | mp_print_str(print, "\\\\"); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 67 | } else if (32 <= ch && ch <= 126) { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 68 | mp_printf(print, "%c", ch); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 69 | } else if (ch == '\n') { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 70 | mp_print_str(print, "\\n"); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 71 | } else if (ch == '\r') { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 72 | mp_print_str(print, "\\r"); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 73 | } else if (ch == '\t') { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 74 | mp_print_str(print, "\\t"); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 75 | } else if (ch < 0x100) { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 76 | mp_printf(print, "\\x%02x", ch); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 77 | } else if (ch < 0x10000) { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 78 | mp_printf(print, "\\u%04x", ch); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 79 | } else { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 80 | mp_printf(print, "\\U%08x", ch); |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 81 | } |
| 82 | } |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 83 | mp_printf(print, "%c", quote_char); |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 84 | } |
| 85 | |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 86 | STATIC void uni_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t kind) { |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 87 | GET_STR_DATA_LEN(self_in, str_data, str_len); |
Damien George | 612045f | 2014-09-17 22:56:34 +0100 | [diff] [blame] | 88 | #if MICROPY_PY_UJSON |
| 89 | if (kind == PRINT_JSON) { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 90 | mp_str_print_json(print, str_data, str_len); |
Damien George | 612045f | 2014-09-17 22:56:34 +0100 | [diff] [blame] | 91 | return; |
| 92 | } |
| 93 | #endif |
Paul Sokolovsky | 86d3898 | 2014-06-13 23:00:15 +0300 | [diff] [blame] | 94 | if (kind == PRINT_STR) { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 95 | mp_printf(print, "%.*s", str_len, str_data); |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 96 | } else { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 97 | uni_print_quoted(print, str_data, str_len); |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 98 | } |
| 99 | } |
| 100 | |
Damien George | 58321dd | 2017-08-29 13:04:01 +1000 | [diff] [blame] | 101 | STATIC mp_obj_t uni_unary_op(mp_unary_op_t op, mp_obj_t self_in) { |
Paul Sokolovsky | e7f2b4c | 2014-06-13 23:37:18 +0300 | [diff] [blame] | 102 | GET_STR_DATA_LEN(self_in, str_data, str_len); |
| 103 | switch (op) { |
| 104 | case MP_UNARY_OP_BOOL: |
Paul Sokolovsky | 1b586f3 | 2015-10-11 12:09:43 +0300 | [diff] [blame] | 105 | return mp_obj_new_bool(str_len != 0); |
Paul Sokolovsky | e7f2b4c | 2014-06-13 23:37:18 +0300 | [diff] [blame] | 106 | case MP_UNARY_OP_LEN: |
Paul Sokolovsky | 9e215fa | 2014-06-28 23:14:30 +0300 | [diff] [blame] | 107 | return MP_OBJ_NEW_SMALL_INT(unichar_charlen((const char *)str_data, str_len)); |
Paul Sokolovsky | e7f2b4c | 2014-06-13 23:37:18 +0300 | [diff] [blame] | 108 | default: |
| 109 | return MP_OBJ_NULL; // op not supported |
| 110 | } |
| 111 | } |
| 112 | |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 113 | // Convert an index into a pointer to its lead byte. Out of bounds indexing will raise IndexError or |
| 114 | // be capped to the first/last character of the string, depending on is_slice. |
Damien George | 999cedb | 2015-11-27 17:01:44 +0000 | [diff] [blame] | 115 | const byte *str_index_to_ptr(const mp_obj_type_t *type, const byte *self_data, size_t self_len, |
Paul Sokolovsky | ea2c936 | 2014-06-15 00:35:09 +0300 | [diff] [blame] | 116 | mp_obj_t index, bool is_slice) { |
Paul Sokolovsky | 6af90b2 | 2016-07-25 14:43:04 +0300 | [diff] [blame] | 117 | // All str functions also handle bytes objects, and they call str_index_to_ptr(), |
| 118 | // so it must handle bytes. |
| 119 | if (type == &mp_type_bytes) { |
| 120 | // Taken from objstr.c:str_index_to_ptr() |
Damien George | c88cfe1 | 2017-03-23 16:17:40 +1100 | [diff] [blame] | 121 | size_t index_val = mp_get_index(type, self_len, index, is_slice); |
Paul Sokolovsky | 6af90b2 | 2016-07-25 14:43:04 +0300 | [diff] [blame] | 122 | return self_data + index_val; |
| 123 | } |
| 124 | |
Damien George | 40f3c02 | 2014-07-03 13:25:24 +0100 | [diff] [blame] | 125 | mp_int_t i; |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 126 | // Copied from mp_get_index; I don't want bounds checking, just give me |
| 127 | // the integer as-is. (I can't bounds-check without scanning the whole |
| 128 | // string; an out-of-bounds index will be caught in the loops below.) |
| 129 | if (MP_OBJ_IS_SMALL_INT(index)) { |
| 130 | i = MP_OBJ_SMALL_INT_VALUE(index); |
| 131 | } else if (!mp_obj_get_int_maybe(index, &i)) { |
| 132 | nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError, "string indices must be integers, not %s", mp_obj_get_type_str(index))); |
| 133 | } |
Paul Sokolovsky | ea2c936 | 2014-06-15 00:35:09 +0300 | [diff] [blame] | 134 | const byte *s, *top = self_data + self_len; |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 135 | if (i < 0) |
| 136 | { |
| 137 | // Negative indexing is performed by counting from the end of the string. |
| 138 | for (s = top - 1; i; --s) { |
| 139 | if (s < self_data) { |
| 140 | if (is_slice) { |
| 141 | return self_data; |
| 142 | } |
Damien George | 48d867b | 2017-06-15 11:54:41 +1000 | [diff] [blame] | 143 | mp_raise_msg(&mp_type_IndexError, "string index out of range"); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 144 | } |
| 145 | if (!UTF8_IS_CONT(*s)) { |
| 146 | ++i; |
| 147 | } |
| 148 | } |
| 149 | ++s; |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 150 | } else { |
| 151 | // Positive indexing, correspondingly, counts from the start of the string. |
| 152 | // It's assumed that negative indexing will generally be used with small |
| 153 | // absolute values (eg str[-1], not str[-1000000]), which means it'll be |
| 154 | // more efficient this way. |
Paul Sokolovsky | ed1c194 | 2016-07-25 19:02:51 +0300 | [diff] [blame] | 155 | s = self_data; |
| 156 | while (1) { |
| 157 | // First check out-of-bounds |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 158 | if (s >= top) { |
| 159 | if (is_slice) { |
| 160 | return top; |
| 161 | } |
Damien George | 48d867b | 2017-06-15 11:54:41 +1000 | [diff] [blame] | 162 | mp_raise_msg(&mp_type_IndexError, "string index out of range"); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 163 | } |
Paul Sokolovsky | ed1c194 | 2016-07-25 19:02:51 +0300 | [diff] [blame] | 164 | // Then check completion |
| 165 | if (i-- == 0) { |
| 166 | break; |
| 167 | } |
| 168 | // Then skip UTF-8 char |
| 169 | ++s; |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 170 | while (UTF8_IS_CONT(*s)) { |
| 171 | ++s; |
| 172 | } |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 173 | } |
| 174 | } |
| 175 | return s; |
| 176 | } |
| 177 | |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 178 | STATIC mp_obj_t str_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) { |
| 179 | mp_obj_type_t *type = mp_obj_get_type(self_in); |
Damien George | 0528c5a | 2015-04-04 19:42:03 +0100 | [diff] [blame] | 180 | assert(type == &mp_type_str); |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 181 | GET_STR_DATA_LEN(self_in, self_data, self_len); |
| 182 | if (value == MP_OBJ_SENTINEL) { |
| 183 | // load |
| 184 | #if MICROPY_PY_BUILTINS_SLICE |
| 185 | if (MP_OBJ_IS_TYPE(index, &mp_type_slice)) { |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 186 | mp_obj_t ostart, ostop, ostep; |
| 187 | mp_obj_slice_get(index, &ostart, &ostop, &ostep); |
| 188 | if (ostep != mp_const_none && ostep != MP_OBJ_NEW_SMALL_INT(1)) { |
Javier Candeira | 35a1fea | 2017-08-09 14:40:45 +1000 | [diff] [blame] | 189 | mp_raise_NotImplementedError("only slices with step=1 (aka None) are supported"); |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 190 | } |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 191 | |
Paul Sokolovsky | ea2c936 | 2014-06-15 00:35:09 +0300 | [diff] [blame] | 192 | const byte *pstart, *pstop; |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 193 | if (ostart != mp_const_none) { |
Paul Sokolovsky | ea2c936 | 2014-06-15 00:35:09 +0300 | [diff] [blame] | 194 | pstart = str_index_to_ptr(type, self_data, self_len, ostart, true); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 195 | } else { |
Paul Sokolovsky | ea2c936 | 2014-06-15 00:35:09 +0300 | [diff] [blame] | 196 | pstart = self_data; |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 197 | } |
| 198 | if (ostop != mp_const_none) { |
| 199 | // pstop will point just after the stop character. This depends on |
| 200 | // the \0 at the end of the string. |
Paul Sokolovsky | ea2c936 | 2014-06-15 00:35:09 +0300 | [diff] [blame] | 201 | pstop = str_index_to_ptr(type, self_data, self_len, ostop, true); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 202 | } else { |
Paul Sokolovsky | ea2c936 | 2014-06-15 00:35:09 +0300 | [diff] [blame] | 203 | pstop = self_data + self_len; |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 204 | } |
| 205 | if (pstop < pstart) { |
| 206 | return MP_OBJ_NEW_QSTR(MP_QSTR_); |
| 207 | } |
| 208 | return mp_obj_new_str_of_type(type, (const byte *)pstart, pstop - pstart); |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 209 | } |
| 210 | #endif |
Paul Sokolovsky | ea2c936 | 2014-06-15 00:35:09 +0300 | [diff] [blame] | 211 | const byte *s = str_index_to_ptr(type, self_data, self_len, index, false); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 212 | int len = 1; |
| 213 | if (UTF8_IS_NONASCII(*s)) { |
| 214 | // Count the number of 1 bits (after the first) |
| 215 | for (char mask = 0x40; *s & mask; mask >>= 1) { |
| 216 | ++len; |
| 217 | } |
| 218 | } |
Damien George | 4601759 | 2017-11-16 13:17:51 +1100 | [diff] [blame] | 219 | return mp_obj_new_str_via_qstr((const char*)s, len); // This will create a one-character string |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 220 | } else { |
| 221 | return MP_OBJ_NULL; // op not supported |
| 222 | } |
| 223 | } |
| 224 | |
Damien George | cbf7674 | 2015-11-27 13:38:15 +0000 | [diff] [blame] | 225 | STATIC const mp_rom_map_elem_t struni_locals_dict_table[] = { |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 226 | #if MICROPY_CPYTHON_COMPAT |
Damien George | cbf7674 | 2015-11-27 13:38:15 +0000 | [diff] [blame] | 227 | { MP_ROM_QSTR(MP_QSTR_encode), MP_ROM_PTR(&str_encode_obj) }, |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 228 | #endif |
Damien George | cbf7674 | 2015-11-27 13:38:15 +0000 | [diff] [blame] | 229 | { MP_ROM_QSTR(MP_QSTR_find), MP_ROM_PTR(&str_find_obj) }, |
| 230 | { MP_ROM_QSTR(MP_QSTR_rfind), MP_ROM_PTR(&str_rfind_obj) }, |
| 231 | { MP_ROM_QSTR(MP_QSTR_index), MP_ROM_PTR(&str_index_obj) }, |
| 232 | { MP_ROM_QSTR(MP_QSTR_rindex), MP_ROM_PTR(&str_rindex_obj) }, |
| 233 | { MP_ROM_QSTR(MP_QSTR_join), MP_ROM_PTR(&str_join_obj) }, |
| 234 | { MP_ROM_QSTR(MP_QSTR_split), MP_ROM_PTR(&str_split_obj) }, |
Paul Sokolovsky | ac2f7a7 | 2015-04-04 00:09:23 +0300 | [diff] [blame] | 235 | #if MICROPY_PY_BUILTINS_STR_SPLITLINES |
Damien George | cbf7674 | 2015-11-27 13:38:15 +0000 | [diff] [blame] | 236 | { MP_ROM_QSTR(MP_QSTR_splitlines), MP_ROM_PTR(&str_splitlines_obj) }, |
Paul Sokolovsky | ac2f7a7 | 2015-04-04 00:09:23 +0300 | [diff] [blame] | 237 | #endif |
Damien George | cbf7674 | 2015-11-27 13:38:15 +0000 | [diff] [blame] | 238 | { MP_ROM_QSTR(MP_QSTR_rsplit), MP_ROM_PTR(&str_rsplit_obj) }, |
| 239 | { MP_ROM_QSTR(MP_QSTR_startswith), MP_ROM_PTR(&str_startswith_obj) }, |
| 240 | { MP_ROM_QSTR(MP_QSTR_endswith), MP_ROM_PTR(&str_endswith_obj) }, |
| 241 | { MP_ROM_QSTR(MP_QSTR_strip), MP_ROM_PTR(&str_strip_obj) }, |
| 242 | { MP_ROM_QSTR(MP_QSTR_lstrip), MP_ROM_PTR(&str_lstrip_obj) }, |
| 243 | { MP_ROM_QSTR(MP_QSTR_rstrip), MP_ROM_PTR(&str_rstrip_obj) }, |
| 244 | { MP_ROM_QSTR(MP_QSTR_format), MP_ROM_PTR(&str_format_obj) }, |
| 245 | { MP_ROM_QSTR(MP_QSTR_replace), MP_ROM_PTR(&str_replace_obj) }, |
| 246 | { MP_ROM_QSTR(MP_QSTR_count), MP_ROM_PTR(&str_count_obj) }, |
Paul Sokolovsky | 56eb25f | 2016-08-07 06:46:55 +0300 | [diff] [blame] | 247 | #if MICROPY_PY_BUILTINS_STR_PARTITION |
Damien George | cbf7674 | 2015-11-27 13:38:15 +0000 | [diff] [blame] | 248 | { MP_ROM_QSTR(MP_QSTR_partition), MP_ROM_PTR(&str_partition_obj) }, |
| 249 | { MP_ROM_QSTR(MP_QSTR_rpartition), MP_ROM_PTR(&str_rpartition_obj) }, |
Paul Sokolovsky | 56eb25f | 2016-08-07 06:46:55 +0300 | [diff] [blame] | 250 | #endif |
Paul Sokolovsky | 1563388 | 2016-08-07 15:24:57 +0300 | [diff] [blame] | 251 | #if MICROPY_PY_BUILTINS_STR_CENTER |
Paul Sokolovsky | 1b5abfc | 2016-05-22 00:13:44 +0300 | [diff] [blame] | 252 | { MP_ROM_QSTR(MP_QSTR_center), MP_ROM_PTR(&str_center_obj) }, |
Paul Sokolovsky | 1563388 | 2016-08-07 15:24:57 +0300 | [diff] [blame] | 253 | #endif |
Damien George | cbf7674 | 2015-11-27 13:38:15 +0000 | [diff] [blame] | 254 | { MP_ROM_QSTR(MP_QSTR_lower), MP_ROM_PTR(&str_lower_obj) }, |
| 255 | { MP_ROM_QSTR(MP_QSTR_upper), MP_ROM_PTR(&str_upper_obj) }, |
| 256 | { MP_ROM_QSTR(MP_QSTR_isspace), MP_ROM_PTR(&str_isspace_obj) }, |
| 257 | { MP_ROM_QSTR(MP_QSTR_isalpha), MP_ROM_PTR(&str_isalpha_obj) }, |
| 258 | { MP_ROM_QSTR(MP_QSTR_isdigit), MP_ROM_PTR(&str_isdigit_obj) }, |
| 259 | { MP_ROM_QSTR(MP_QSTR_isupper), MP_ROM_PTR(&str_isupper_obj) }, |
| 260 | { MP_ROM_QSTR(MP_QSTR_islower), MP_ROM_PTR(&str_islower_obj) }, |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 261 | }; |
| 262 | |
Paul Sokolovsky | 6113eb2 | 2015-01-23 02:05:58 +0200 | [diff] [blame] | 263 | STATIC MP_DEFINE_CONST_DICT(struni_locals_dict, struni_locals_dict_table); |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 264 | |
| 265 | const mp_obj_type_t mp_type_str = { |
| 266 | { &mp_type_type }, |
| 267 | .name = MP_QSTR_str, |
Paul Sokolovsky | 86d3898 | 2014-06-13 23:00:15 +0300 | [diff] [blame] | 268 | .print = uni_print, |
Paul Sokolovsky | 344e15b | 2015-01-23 02:15:56 +0200 | [diff] [blame] | 269 | .make_new = mp_obj_str_make_new, |
Paul Sokolovsky | e7f2b4c | 2014-06-13 23:37:18 +0300 | [diff] [blame] | 270 | .unary_op = uni_unary_op, |
Damien George | e04a44e | 2014-06-28 10:27:23 +0100 | [diff] [blame] | 271 | .binary_op = mp_obj_str_binary_op, |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 272 | .subscr = str_subscr, |
| 273 | .getiter = mp_obj_new_str_iterator, |
Damien George | e04a44e | 2014-06-28 10:27:23 +0100 | [diff] [blame] | 274 | .buffer_p = { .get_buffer = mp_obj_str_get_buffer }, |
Damien George | 999cedb | 2015-11-27 17:01:44 +0000 | [diff] [blame] | 275 | .locals_dict = (mp_obj_dict_t*)&struni_locals_dict, |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 276 | }; |
| 277 | |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 278 | /******************************************************************************/ |
| 279 | /* str iterator */ |
| 280 | |
| 281 | typedef struct _mp_obj_str_it_t { |
| 282 | mp_obj_base_t base; |
Damien George | 8212d97 | 2016-01-03 16:27:55 +0000 | [diff] [blame] | 283 | mp_fun_1_t iternext; |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 284 | mp_obj_t str; |
Damien George | c0d9500 | 2017-02-16 16:26:48 +1100 | [diff] [blame] | 285 | size_t cur; |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 286 | } mp_obj_str_it_t; |
| 287 | |
| 288 | STATIC mp_obj_t str_it_iternext(mp_obj_t self_in) { |
Damien George | 999cedb | 2015-11-27 17:01:44 +0000 | [diff] [blame] | 289 | mp_obj_str_it_t *self = MP_OBJ_TO_PTR(self_in); |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 290 | GET_STR_DATA_LEN(self->str, str, len); |
| 291 | if (self->cur < len) { |
Paul Sokolovsky | 79b7fe2 | 2014-06-14 02:07:25 +0300 | [diff] [blame] | 292 | const byte *cur = str + self->cur; |
| 293 | const byte *end = utf8_next_char(str + self->cur); |
Damien George | 4601759 | 2017-11-16 13:17:51 +1100 | [diff] [blame] | 294 | mp_obj_t o_out = mp_obj_new_str_via_qstr((const char*)cur, end - cur); |
Paul Sokolovsky | 79b7fe2 | 2014-06-14 02:07:25 +0300 | [diff] [blame] | 295 | self->cur += end - cur; |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 296 | return o_out; |
| 297 | } else { |
| 298 | return MP_OBJ_STOP_ITERATION; |
| 299 | } |
| 300 | } |
| 301 | |
Damien George | ae8d867 | 2016-01-09 23:14:54 +0000 | [diff] [blame] | 302 | STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str, mp_obj_iter_buf_t *iter_buf) { |
| 303 | assert(sizeof(mp_obj_str_it_t) <= sizeof(mp_obj_iter_buf_t)); |
| 304 | mp_obj_str_it_t *o = (mp_obj_str_it_t*)iter_buf; |
Damien George | 8212d97 | 2016-01-03 16:27:55 +0000 | [diff] [blame] | 305 | o->base.type = &mp_type_polymorph_iter; |
| 306 | o->iternext = str_it_iternext; |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 307 | o->str = str; |
| 308 | o->cur = 0; |
Damien George | 999cedb | 2015-11-27 17:01:44 +0000 | [diff] [blame] | 309 | return MP_OBJ_FROM_PTR(o); |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 310 | } |
| 311 | |
Paul Sokolovsky | 9731912 | 2014-06-13 22:01:26 +0300 | [diff] [blame] | 312 | #endif // MICROPY_PY_BUILTINS_STR_UNICODE |