Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 1 | /* |
Alexander Steffen | 55f3324 | 2017-06-30 09:22:17 +0200 | [diff] [blame] | 2 | * This file is part of the MicroPython project, http://micropython.org/ |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 3 | * |
| 4 | * The MIT License (MIT) |
| 5 | * |
| 6 | * Copyright (c) 2013, 2014 Damien P. George |
| 7 | * Copyright (c) 2014 Paul Sokolovsky |
| 8 | * |
| 9 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 10 | * of this software and associated documentation files (the "Software"), to deal |
| 11 | * in the Software without restriction, including without limitation the rights |
| 12 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 13 | * copies of the Software, and to permit persons to whom the Software is |
| 14 | * furnished to do so, subject to the following conditions: |
| 15 | * |
| 16 | * The above copyright notice and this permission notice shall be included in |
| 17 | * all copies or substantial portions of the Software. |
| 18 | * |
| 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 20 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 21 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 22 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 23 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 24 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| 25 | * THE SOFTWARE. |
| 26 | */ |
| 27 | |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 28 | #include <string.h> |
| 29 | #include <assert.h> |
| 30 | |
Damien George | 51dfcb4 | 2015-01-01 20:27:54 +0000 | [diff] [blame] | 31 | #include "py/nlr.h" |
| 32 | #include "py/objstr.h" |
| 33 | #include "py/objlist.h" |
| 34 | #include "py/runtime0.h" |
| 35 | #include "py/runtime.h" |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 36 | |
Paul Sokolovsky | 9731912 | 2014-06-13 22:01:26 +0300 | [diff] [blame] | 37 | #if MICROPY_PY_BUILTINS_STR_UNICODE |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 38 | |
Damien George | ae8d867 | 2016-01-09 23:14:54 +0000 | [diff] [blame] | 39 | STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str, mp_obj_iter_buf_t *iter_buf); |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 40 | |
| 41 | /******************************************************************************/ |
| 42 | /* str */ |
| 43 | |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 44 | STATIC void uni_print_quoted(const mp_print_t *print, const byte *str_data, uint str_len) { |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 45 | // this escapes characters, but it will be very slow to print (calling print many times) |
| 46 | bool has_single_quote = false; |
| 47 | bool has_double_quote = false; |
| 48 | for (const byte *s = str_data, *top = str_data + str_len; !has_double_quote && s < top; s++) { |
| 49 | if (*s == '\'') { |
| 50 | has_single_quote = true; |
| 51 | } else if (*s == '"') { |
| 52 | has_double_quote = true; |
| 53 | } |
| 54 | } |
Damien George | 2e2e404 | 2015-03-19 00:21:29 +0000 | [diff] [blame] | 55 | unichar quote_char = '\''; |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 56 | if (has_single_quote && !has_double_quote) { |
| 57 | quote_char = '"'; |
| 58 | } |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 59 | mp_printf(print, "%c", quote_char); |
Paul Sokolovsky | 00c904b | 2014-06-14 17:48:40 +0300 | [diff] [blame] | 60 | const byte *s = str_data, *top = str_data + str_len; |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 61 | while (s < top) { |
| 62 | unichar ch; |
Paul Sokolovsky | 86d3898 | 2014-06-13 23:00:15 +0300 | [diff] [blame] | 63 | ch = utf8_get_char(s); |
| 64 | s = utf8_next_char(s); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 65 | if (ch == quote_char) { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 66 | mp_printf(print, "\\%c", quote_char); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 67 | } else if (ch == '\\') { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 68 | mp_print_str(print, "\\\\"); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 69 | } else if (32 <= ch && ch <= 126) { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 70 | mp_printf(print, "%c", ch); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 71 | } else if (ch == '\n') { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 72 | mp_print_str(print, "\\n"); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 73 | } else if (ch == '\r') { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 74 | mp_print_str(print, "\\r"); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 75 | } else if (ch == '\t') { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 76 | mp_print_str(print, "\\t"); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 77 | } else if (ch < 0x100) { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 78 | mp_printf(print, "\\x%02x", ch); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 79 | } else if (ch < 0x10000) { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 80 | mp_printf(print, "\\u%04x", ch); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 81 | } else { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 82 | mp_printf(print, "\\U%08x", ch); |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 83 | } |
| 84 | } |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 85 | mp_printf(print, "%c", quote_char); |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 86 | } |
| 87 | |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 88 | STATIC void uni_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t kind) { |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 89 | GET_STR_DATA_LEN(self_in, str_data, str_len); |
Damien George | 612045f | 2014-09-17 22:56:34 +0100 | [diff] [blame] | 90 | #if MICROPY_PY_UJSON |
| 91 | if (kind == PRINT_JSON) { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 92 | mp_str_print_json(print, str_data, str_len); |
Damien George | 612045f | 2014-09-17 22:56:34 +0100 | [diff] [blame] | 93 | return; |
| 94 | } |
| 95 | #endif |
Paul Sokolovsky | 86d3898 | 2014-06-13 23:00:15 +0300 | [diff] [blame] | 96 | if (kind == PRINT_STR) { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 97 | mp_printf(print, "%.*s", str_len, str_data); |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 98 | } else { |
Damien George | 7f9d1d6 | 2015-04-09 23:56:15 +0100 | [diff] [blame] | 99 | uni_print_quoted(print, str_data, str_len); |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 100 | } |
| 101 | } |
| 102 | |
Damien George | 58321dd | 2017-08-29 13:04:01 +1000 | [diff] [blame^] | 103 | STATIC mp_obj_t uni_unary_op(mp_unary_op_t op, mp_obj_t self_in) { |
Paul Sokolovsky | e7f2b4c | 2014-06-13 23:37:18 +0300 | [diff] [blame] | 104 | GET_STR_DATA_LEN(self_in, str_data, str_len); |
| 105 | switch (op) { |
| 106 | case MP_UNARY_OP_BOOL: |
Paul Sokolovsky | 1b586f3 | 2015-10-11 12:09:43 +0300 | [diff] [blame] | 107 | return mp_obj_new_bool(str_len != 0); |
Paul Sokolovsky | e7f2b4c | 2014-06-13 23:37:18 +0300 | [diff] [blame] | 108 | case MP_UNARY_OP_LEN: |
Paul Sokolovsky | 9e215fa | 2014-06-28 23:14:30 +0300 | [diff] [blame] | 109 | return MP_OBJ_NEW_SMALL_INT(unichar_charlen((const char *)str_data, str_len)); |
Paul Sokolovsky | e7f2b4c | 2014-06-13 23:37:18 +0300 | [diff] [blame] | 110 | default: |
| 111 | return MP_OBJ_NULL; // op not supported |
| 112 | } |
| 113 | } |
| 114 | |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 115 | // Convert an index into a pointer to its lead byte. Out of bounds indexing will raise IndexError or |
| 116 | // be capped to the first/last character of the string, depending on is_slice. |
Damien George | 999cedb | 2015-11-27 17:01:44 +0000 | [diff] [blame] | 117 | const byte *str_index_to_ptr(const mp_obj_type_t *type, const byte *self_data, size_t self_len, |
Paul Sokolovsky | ea2c936 | 2014-06-15 00:35:09 +0300 | [diff] [blame] | 118 | mp_obj_t index, bool is_slice) { |
Paul Sokolovsky | 6af90b2 | 2016-07-25 14:43:04 +0300 | [diff] [blame] | 119 | // All str functions also handle bytes objects, and they call str_index_to_ptr(), |
| 120 | // so it must handle bytes. |
| 121 | if (type == &mp_type_bytes) { |
| 122 | // Taken from objstr.c:str_index_to_ptr() |
Damien George | c88cfe1 | 2017-03-23 16:17:40 +1100 | [diff] [blame] | 123 | size_t index_val = mp_get_index(type, self_len, index, is_slice); |
Paul Sokolovsky | 6af90b2 | 2016-07-25 14:43:04 +0300 | [diff] [blame] | 124 | return self_data + index_val; |
| 125 | } |
| 126 | |
Damien George | 40f3c02 | 2014-07-03 13:25:24 +0100 | [diff] [blame] | 127 | mp_int_t i; |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 128 | // Copied from mp_get_index; I don't want bounds checking, just give me |
| 129 | // the integer as-is. (I can't bounds-check without scanning the whole |
| 130 | // string; an out-of-bounds index will be caught in the loops below.) |
| 131 | if (MP_OBJ_IS_SMALL_INT(index)) { |
| 132 | i = MP_OBJ_SMALL_INT_VALUE(index); |
| 133 | } else if (!mp_obj_get_int_maybe(index, &i)) { |
| 134 | nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError, "string indices must be integers, not %s", mp_obj_get_type_str(index))); |
| 135 | } |
Paul Sokolovsky | ea2c936 | 2014-06-15 00:35:09 +0300 | [diff] [blame] | 136 | const byte *s, *top = self_data + self_len; |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 137 | if (i < 0) |
| 138 | { |
| 139 | // Negative indexing is performed by counting from the end of the string. |
| 140 | for (s = top - 1; i; --s) { |
| 141 | if (s < self_data) { |
| 142 | if (is_slice) { |
| 143 | return self_data; |
| 144 | } |
Damien George | 48d867b | 2017-06-15 11:54:41 +1000 | [diff] [blame] | 145 | mp_raise_msg(&mp_type_IndexError, "string index out of range"); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 146 | } |
| 147 | if (!UTF8_IS_CONT(*s)) { |
| 148 | ++i; |
| 149 | } |
| 150 | } |
| 151 | ++s; |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 152 | } else { |
| 153 | // Positive indexing, correspondingly, counts from the start of the string. |
| 154 | // It's assumed that negative indexing will generally be used with small |
| 155 | // absolute values (eg str[-1], not str[-1000000]), which means it'll be |
| 156 | // more efficient this way. |
Paul Sokolovsky | ed1c194 | 2016-07-25 19:02:51 +0300 | [diff] [blame] | 157 | s = self_data; |
| 158 | while (1) { |
| 159 | // First check out-of-bounds |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 160 | if (s >= top) { |
| 161 | if (is_slice) { |
| 162 | return top; |
| 163 | } |
Damien George | 48d867b | 2017-06-15 11:54:41 +1000 | [diff] [blame] | 164 | mp_raise_msg(&mp_type_IndexError, "string index out of range"); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 165 | } |
Paul Sokolovsky | ed1c194 | 2016-07-25 19:02:51 +0300 | [diff] [blame] | 166 | // Then check completion |
| 167 | if (i-- == 0) { |
| 168 | break; |
| 169 | } |
| 170 | // Then skip UTF-8 char |
| 171 | ++s; |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 172 | while (UTF8_IS_CONT(*s)) { |
| 173 | ++s; |
| 174 | } |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 175 | } |
| 176 | } |
| 177 | return s; |
| 178 | } |
| 179 | |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 180 | STATIC mp_obj_t str_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) { |
| 181 | mp_obj_type_t *type = mp_obj_get_type(self_in); |
Damien George | 0528c5a | 2015-04-04 19:42:03 +0100 | [diff] [blame] | 182 | assert(type == &mp_type_str); |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 183 | GET_STR_DATA_LEN(self_in, self_data, self_len); |
| 184 | if (value == MP_OBJ_SENTINEL) { |
| 185 | // load |
| 186 | #if MICROPY_PY_BUILTINS_SLICE |
| 187 | if (MP_OBJ_IS_TYPE(index, &mp_type_slice)) { |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 188 | mp_obj_t ostart, ostop, ostep; |
| 189 | mp_obj_slice_get(index, &ostart, &ostop, &ostep); |
| 190 | if (ostep != mp_const_none && ostep != MP_OBJ_NEW_SMALL_INT(1)) { |
Javier Candeira | 35a1fea | 2017-08-09 14:40:45 +1000 | [diff] [blame] | 191 | mp_raise_NotImplementedError("only slices with step=1 (aka None) are supported"); |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 192 | } |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 193 | |
Paul Sokolovsky | ea2c936 | 2014-06-15 00:35:09 +0300 | [diff] [blame] | 194 | const byte *pstart, *pstop; |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 195 | if (ostart != mp_const_none) { |
Paul Sokolovsky | ea2c936 | 2014-06-15 00:35:09 +0300 | [diff] [blame] | 196 | pstart = str_index_to_ptr(type, self_data, self_len, ostart, true); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 197 | } else { |
Paul Sokolovsky | ea2c936 | 2014-06-15 00:35:09 +0300 | [diff] [blame] | 198 | pstart = self_data; |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 199 | } |
| 200 | if (ostop != mp_const_none) { |
| 201 | // pstop will point just after the stop character. This depends on |
| 202 | // the \0 at the end of the string. |
Paul Sokolovsky | ea2c936 | 2014-06-15 00:35:09 +0300 | [diff] [blame] | 203 | pstop = str_index_to_ptr(type, self_data, self_len, ostop, true); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 204 | } else { |
Paul Sokolovsky | ea2c936 | 2014-06-15 00:35:09 +0300 | [diff] [blame] | 205 | pstop = self_data + self_len; |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 206 | } |
| 207 | if (pstop < pstart) { |
| 208 | return MP_OBJ_NEW_QSTR(MP_QSTR_); |
| 209 | } |
| 210 | return mp_obj_new_str_of_type(type, (const byte *)pstart, pstop - pstart); |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 211 | } |
| 212 | #endif |
Paul Sokolovsky | ea2c936 | 2014-06-15 00:35:09 +0300 | [diff] [blame] | 213 | const byte *s = str_index_to_ptr(type, self_data, self_len, index, false); |
Chris Angelico | 64b468d | 2014-06-04 05:28:12 +1000 | [diff] [blame] | 214 | int len = 1; |
| 215 | if (UTF8_IS_NONASCII(*s)) { |
| 216 | // Count the number of 1 bits (after the first) |
| 217 | for (char mask = 0x40; *s & mask; mask >>= 1) { |
| 218 | ++len; |
| 219 | } |
| 220 | } |
Paul Sokolovsky | ea2c936 | 2014-06-15 00:35:09 +0300 | [diff] [blame] | 221 | return mp_obj_new_str((const char*)s, len, true); // This will create a one-character string |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 222 | } else { |
| 223 | return MP_OBJ_NULL; // op not supported |
| 224 | } |
| 225 | } |
| 226 | |
Damien George | cbf7674 | 2015-11-27 13:38:15 +0000 | [diff] [blame] | 227 | STATIC const mp_rom_map_elem_t struni_locals_dict_table[] = { |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 228 | #if MICROPY_CPYTHON_COMPAT |
Damien George | cbf7674 | 2015-11-27 13:38:15 +0000 | [diff] [blame] | 229 | { MP_ROM_QSTR(MP_QSTR_encode), MP_ROM_PTR(&str_encode_obj) }, |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 230 | #endif |
Damien George | cbf7674 | 2015-11-27 13:38:15 +0000 | [diff] [blame] | 231 | { MP_ROM_QSTR(MP_QSTR_find), MP_ROM_PTR(&str_find_obj) }, |
| 232 | { MP_ROM_QSTR(MP_QSTR_rfind), MP_ROM_PTR(&str_rfind_obj) }, |
| 233 | { MP_ROM_QSTR(MP_QSTR_index), MP_ROM_PTR(&str_index_obj) }, |
| 234 | { MP_ROM_QSTR(MP_QSTR_rindex), MP_ROM_PTR(&str_rindex_obj) }, |
| 235 | { MP_ROM_QSTR(MP_QSTR_join), MP_ROM_PTR(&str_join_obj) }, |
| 236 | { MP_ROM_QSTR(MP_QSTR_split), MP_ROM_PTR(&str_split_obj) }, |
Paul Sokolovsky | ac2f7a7 | 2015-04-04 00:09:23 +0300 | [diff] [blame] | 237 | #if MICROPY_PY_BUILTINS_STR_SPLITLINES |
Damien George | cbf7674 | 2015-11-27 13:38:15 +0000 | [diff] [blame] | 238 | { MP_ROM_QSTR(MP_QSTR_splitlines), MP_ROM_PTR(&str_splitlines_obj) }, |
Paul Sokolovsky | ac2f7a7 | 2015-04-04 00:09:23 +0300 | [diff] [blame] | 239 | #endif |
Damien George | cbf7674 | 2015-11-27 13:38:15 +0000 | [diff] [blame] | 240 | { MP_ROM_QSTR(MP_QSTR_rsplit), MP_ROM_PTR(&str_rsplit_obj) }, |
| 241 | { MP_ROM_QSTR(MP_QSTR_startswith), MP_ROM_PTR(&str_startswith_obj) }, |
| 242 | { MP_ROM_QSTR(MP_QSTR_endswith), MP_ROM_PTR(&str_endswith_obj) }, |
| 243 | { MP_ROM_QSTR(MP_QSTR_strip), MP_ROM_PTR(&str_strip_obj) }, |
| 244 | { MP_ROM_QSTR(MP_QSTR_lstrip), MP_ROM_PTR(&str_lstrip_obj) }, |
| 245 | { MP_ROM_QSTR(MP_QSTR_rstrip), MP_ROM_PTR(&str_rstrip_obj) }, |
| 246 | { MP_ROM_QSTR(MP_QSTR_format), MP_ROM_PTR(&str_format_obj) }, |
| 247 | { MP_ROM_QSTR(MP_QSTR_replace), MP_ROM_PTR(&str_replace_obj) }, |
| 248 | { MP_ROM_QSTR(MP_QSTR_count), MP_ROM_PTR(&str_count_obj) }, |
Paul Sokolovsky | 56eb25f | 2016-08-07 06:46:55 +0300 | [diff] [blame] | 249 | #if MICROPY_PY_BUILTINS_STR_PARTITION |
Damien George | cbf7674 | 2015-11-27 13:38:15 +0000 | [diff] [blame] | 250 | { MP_ROM_QSTR(MP_QSTR_partition), MP_ROM_PTR(&str_partition_obj) }, |
| 251 | { MP_ROM_QSTR(MP_QSTR_rpartition), MP_ROM_PTR(&str_rpartition_obj) }, |
Paul Sokolovsky | 56eb25f | 2016-08-07 06:46:55 +0300 | [diff] [blame] | 252 | #endif |
Paul Sokolovsky | 1563388 | 2016-08-07 15:24:57 +0300 | [diff] [blame] | 253 | #if MICROPY_PY_BUILTINS_STR_CENTER |
Paul Sokolovsky | 1b5abfc | 2016-05-22 00:13:44 +0300 | [diff] [blame] | 254 | { MP_ROM_QSTR(MP_QSTR_center), MP_ROM_PTR(&str_center_obj) }, |
Paul Sokolovsky | 1563388 | 2016-08-07 15:24:57 +0300 | [diff] [blame] | 255 | #endif |
Damien George | cbf7674 | 2015-11-27 13:38:15 +0000 | [diff] [blame] | 256 | { MP_ROM_QSTR(MP_QSTR_lower), MP_ROM_PTR(&str_lower_obj) }, |
| 257 | { MP_ROM_QSTR(MP_QSTR_upper), MP_ROM_PTR(&str_upper_obj) }, |
| 258 | { MP_ROM_QSTR(MP_QSTR_isspace), MP_ROM_PTR(&str_isspace_obj) }, |
| 259 | { MP_ROM_QSTR(MP_QSTR_isalpha), MP_ROM_PTR(&str_isalpha_obj) }, |
| 260 | { MP_ROM_QSTR(MP_QSTR_isdigit), MP_ROM_PTR(&str_isdigit_obj) }, |
| 261 | { MP_ROM_QSTR(MP_QSTR_isupper), MP_ROM_PTR(&str_isupper_obj) }, |
| 262 | { MP_ROM_QSTR(MP_QSTR_islower), MP_ROM_PTR(&str_islower_obj) }, |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 263 | }; |
| 264 | |
Paul Sokolovsky | 6113eb2 | 2015-01-23 02:05:58 +0200 | [diff] [blame] | 265 | STATIC MP_DEFINE_CONST_DICT(struni_locals_dict, struni_locals_dict_table); |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 266 | |
| 267 | const mp_obj_type_t mp_type_str = { |
| 268 | { &mp_type_type }, |
| 269 | .name = MP_QSTR_str, |
Paul Sokolovsky | 86d3898 | 2014-06-13 23:00:15 +0300 | [diff] [blame] | 270 | .print = uni_print, |
Paul Sokolovsky | 344e15b | 2015-01-23 02:15:56 +0200 | [diff] [blame] | 271 | .make_new = mp_obj_str_make_new, |
Paul Sokolovsky | e7f2b4c | 2014-06-13 23:37:18 +0300 | [diff] [blame] | 272 | .unary_op = uni_unary_op, |
Damien George | e04a44e | 2014-06-28 10:27:23 +0100 | [diff] [blame] | 273 | .binary_op = mp_obj_str_binary_op, |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 274 | .subscr = str_subscr, |
| 275 | .getiter = mp_obj_new_str_iterator, |
Damien George | e04a44e | 2014-06-28 10:27:23 +0100 | [diff] [blame] | 276 | .buffer_p = { .get_buffer = mp_obj_str_get_buffer }, |
Damien George | 999cedb | 2015-11-27 17:01:44 +0000 | [diff] [blame] | 277 | .locals_dict = (mp_obj_dict_t*)&struni_locals_dict, |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 278 | }; |
| 279 | |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 280 | /******************************************************************************/ |
| 281 | /* str iterator */ |
| 282 | |
| 283 | typedef struct _mp_obj_str_it_t { |
| 284 | mp_obj_base_t base; |
Damien George | 8212d97 | 2016-01-03 16:27:55 +0000 | [diff] [blame] | 285 | mp_fun_1_t iternext; |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 286 | mp_obj_t str; |
Damien George | c0d9500 | 2017-02-16 16:26:48 +1100 | [diff] [blame] | 287 | size_t cur; |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 288 | } mp_obj_str_it_t; |
| 289 | |
| 290 | STATIC mp_obj_t str_it_iternext(mp_obj_t self_in) { |
Damien George | 999cedb | 2015-11-27 17:01:44 +0000 | [diff] [blame] | 291 | mp_obj_str_it_t *self = MP_OBJ_TO_PTR(self_in); |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 292 | GET_STR_DATA_LEN(self->str, str, len); |
| 293 | if (self->cur < len) { |
Paul Sokolovsky | 79b7fe2 | 2014-06-14 02:07:25 +0300 | [diff] [blame] | 294 | const byte *cur = str + self->cur; |
| 295 | const byte *end = utf8_next_char(str + self->cur); |
| 296 | mp_obj_t o_out = mp_obj_new_str((const char*)cur, end - cur, true); |
| 297 | self->cur += end - cur; |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 298 | return o_out; |
| 299 | } else { |
| 300 | return MP_OBJ_STOP_ITERATION; |
| 301 | } |
| 302 | } |
| 303 | |
Damien George | ae8d867 | 2016-01-09 23:14:54 +0000 | [diff] [blame] | 304 | STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str, mp_obj_iter_buf_t *iter_buf) { |
| 305 | assert(sizeof(mp_obj_str_it_t) <= sizeof(mp_obj_iter_buf_t)); |
| 306 | mp_obj_str_it_t *o = (mp_obj_str_it_t*)iter_buf; |
Damien George | 8212d97 | 2016-01-03 16:27:55 +0000 | [diff] [blame] | 307 | o->base.type = &mp_type_polymorph_iter; |
| 308 | o->iternext = str_it_iternext; |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 309 | o->str = str; |
| 310 | o->cur = 0; |
Damien George | 999cedb | 2015-11-27 17:01:44 +0000 | [diff] [blame] | 311 | return MP_OBJ_FROM_PTR(o); |
Paul Sokolovsky | 8386534 | 2014-06-13 00:51:34 +0300 | [diff] [blame] | 312 | } |
| 313 | |
Paul Sokolovsky | 9731912 | 2014-06-13 22:01:26 +0300 | [diff] [blame] | 314 | #endif // MICROPY_PY_BUILTINS_STR_UNICODE |