blob: 751f93afd7edcafbba16cdac3a86c78dcf44a226 [file] [log] [blame]
xbeefe34222014-03-16 00:14:26 -07001#include <stdbool.h>
Damiend99b0522013-12-21 18:17:45 +00002#include <string.h>
3#include <assert.h>
4
5#include "nlr.h"
6#include "misc.h"
7#include "mpconfig.h"
Damien George55baff42014-01-21 21:40:13 +00008#include "qstr.h"
Damiend99b0522013-12-21 18:17:45 +00009#include "obj.h"
10#include "runtime0.h"
11#include "runtime.h"
12
13typedef struct _mp_obj_str_t {
14 mp_obj_base_t base;
Damien George5fa93b62014-01-22 14:35:10 +000015 machine_uint_t hash : 16; // XXX here we assume the hash size is 16 bits (it is at the moment; see qstr.c)
16 machine_uint_t len : 16; // len == number of bytes used in data, alloc = len + 1 because (at the moment) we also append a null byte
Paul Sokolovsky5972b4c2014-03-20 16:47:44 +020017 const byte *data;
Damiend99b0522013-12-21 18:17:45 +000018} mp_obj_str_t;
19
Paul Sokolovsky1ecea7c2014-03-21 23:46:59 +020020const mp_obj_t mp_const_empty_bytes;
21
Damien George5fa93b62014-01-22 14:35:10 +000022// use this macro to extract the string hash
23#define GET_STR_HASH(str_obj_in, str_hash) uint str_hash; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_hash = qstr_hash(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_hash = ((mp_obj_str_t*)str_obj_in)->hash; }
24
25// use this macro to extract the string length
26#define GET_STR_LEN(str_obj_in, str_len) uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_len = qstr_len(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; }
27
28// use this macro to extract the string data and length
29#define GET_STR_DATA_LEN(str_obj_in, str_data, str_len) const byte *str_data; uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_data = ((mp_obj_str_t*)str_obj_in)->data; }
30
Paul Sokolovskyd5df6cd2014-02-12 18:15:40 +020031STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str);
32STATIC mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str);
Paul Sokolovskybe020c22014-03-21 11:39:01 +020033STATIC mp_obj_t str_new(const mp_obj_type_t *type, const byte* data, uint len);
xyb8cfc9f02014-01-05 18:47:51 +080034
35/******************************************************************************/
36/* str */
37
Paul Sokolovsky0b7e29c2014-01-28 03:40:06 +020038void mp_str_print_quoted(void (*print)(void *env, const char *fmt, ...), void *env, const byte *str_data, uint str_len) {
39 // this escapes characters, but it will be very slow to print (calling print many times)
40 bool has_single_quote = false;
41 bool has_double_quote = false;
42 for (const byte *s = str_data, *top = str_data + str_len; (!has_single_quote || !has_double_quote) && s < top; s++) {
43 if (*s == '\'') {
44 has_single_quote = true;
45 } else if (*s == '"') {
46 has_double_quote = true;
47 }
48 }
49 int quote_char = '\'';
50 if (has_single_quote && !has_double_quote) {
51 quote_char = '"';
52 }
53 print(env, "%c", quote_char);
54 for (const byte *s = str_data, *top = str_data + str_len; s < top; s++) {
55 if (*s == quote_char) {
56 print(env, "\\%c", quote_char);
57 } else if (*s == '\\') {
58 print(env, "\\\\");
59 } else if (32 <= *s && *s <= 126) {
60 print(env, "%c", *s);
61 } else if (*s == '\n') {
62 print(env, "\\n");
63 // TODO add more escape codes here if we want to match CPython
64 } else {
65 print(env, "\\x%02x", *s);
66 }
67 }
68 print(env, "%c", quote_char);
69}
70
Paul Sokolovskyd5df6cd2014-02-12 18:15:40 +020071STATIC void str_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) {
Damien George5fa93b62014-01-22 14:35:10 +000072 GET_STR_DATA_LEN(self_in, str_data, str_len);
Damien George3e1a5c12014-03-29 13:43:38 +000073 bool is_bytes = MP_OBJ_IS_TYPE(self_in, &mp_type_bytes);
Paul Sokolovsky91fb1c92014-01-24 22:50:40 +020074 if (kind == PRINT_STR && !is_bytes) {
Damien George5fa93b62014-01-22 14:35:10 +000075 print(env, "%.*s", str_len, str_data);
Paul Sokolovsky76d982e2014-01-13 19:19:16 +020076 } else {
Paul Sokolovsky91fb1c92014-01-24 22:50:40 +020077 if (is_bytes) {
78 print(env, "b");
79 }
Paul Sokolovsky0b7e29c2014-01-28 03:40:06 +020080 mp_str_print_quoted(print, env, str_data, str_len);
Paul Sokolovsky76d982e2014-01-13 19:19:16 +020081 }
Damiend99b0522013-12-21 18:17:45 +000082}
83
Paul Sokolovskybe020c22014-03-21 11:39:01 +020084STATIC mp_obj_t str_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
85 switch (n_args) {
86 case 0:
87 return MP_OBJ_NEW_QSTR(MP_QSTR_);
88
89 case 1:
90 {
91 vstr_t *vstr = vstr_new();
92 mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, vstr, args[0], PRINT_STR);
93 mp_obj_t s = mp_obj_new_str((byte*)vstr->buf, vstr->len, false);
94 vstr_free(vstr);
95 return s;
96 }
97
98 case 2:
99 case 3:
100 {
101 // TODO: validate 2nd/3rd args
Damien George3e1a5c12014-03-29 13:43:38 +0000102 if (!MP_OBJ_IS_TYPE(args[0], &mp_type_bytes)) {
Paul Sokolovskybe020c22014-03-21 11:39:01 +0200103 nlr_jump(mp_obj_new_exception_msg(&mp_type_TypeError, "bytes expected"));
104 }
105 GET_STR_DATA_LEN(args[0], str_data, str_len);
106 GET_STR_HASH(args[0], str_hash);
Damien George3e1a5c12014-03-29 13:43:38 +0000107 mp_obj_str_t *o = str_new(&mp_type_str, NULL, str_len);
Paul Sokolovskybe020c22014-03-21 11:39:01 +0200108 o->data = str_data;
109 o->hash = str_hash;
110 return o;
111 }
112
113 default:
114 nlr_jump(mp_obj_new_exception_msg(&mp_type_TypeError, "str takes at most 3 arguments"));
115 }
116}
117
Paul Sokolovsky1ecea7c2014-03-21 23:46:59 +0200118STATIC mp_obj_t bytes_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
119 if (n_args == 0) {
120 return mp_const_empty_bytes;
121 }
122
123 if (MP_OBJ_IS_STR(args[0])) {
124 if (n_args < 2 || n_args > 3) {
125 goto wrong_args;
126 }
127 GET_STR_DATA_LEN(args[0], str_data, str_len);
128 GET_STR_HASH(args[0], str_hash);
Damien George3e1a5c12014-03-29 13:43:38 +0000129 mp_obj_str_t *o = str_new(&mp_type_bytes, NULL, str_len);
Paul Sokolovsky1ecea7c2014-03-21 23:46:59 +0200130 o->data = str_data;
131 o->hash = str_hash;
132 return o;
133 }
134
135 if (n_args > 1) {
136 goto wrong_args;
137 }
138
139 if (MP_OBJ_IS_SMALL_INT(args[0])) {
140 uint len = MP_OBJ_SMALL_INT_VALUE(args[0]);
141 byte *data;
142
Damien George3e1a5c12014-03-29 13:43:38 +0000143 mp_obj_t o = mp_obj_str_builder_start(&mp_type_bytes, len, &data);
Paul Sokolovsky1ecea7c2014-03-21 23:46:59 +0200144 memset(data, 0, len);
145 return mp_obj_str_builder_end(o);
146 }
147
148 int len;
149 byte *data;
150 vstr_t *vstr = NULL;
151 mp_obj_t o = NULL;
152 // Try to create array of exact len if initializer len is known
153 mp_obj_t len_in = mp_obj_len_maybe(args[0]);
154 if (len_in == MP_OBJ_NULL) {
155 len = -1;
156 vstr = vstr_new();
157 } else {
158 len = MP_OBJ_SMALL_INT_VALUE(len_in);
Damien George3e1a5c12014-03-29 13:43:38 +0000159 o = mp_obj_str_builder_start(&mp_type_bytes, len, &data);
Paul Sokolovsky1ecea7c2014-03-21 23:46:59 +0200160 }
161
Damien Georged17926d2014-03-30 13:35:08 +0100162 mp_obj_t iterable = mp_getiter(args[0]);
Paul Sokolovsky1ecea7c2014-03-21 23:46:59 +0200163 mp_obj_t item;
Damien Georged17926d2014-03-30 13:35:08 +0100164 while ((item = mp_iternext(iterable)) != MP_OBJ_NULL) {
Paul Sokolovsky1ecea7c2014-03-21 23:46:59 +0200165 if (len == -1) {
166 vstr_add_char(vstr, MP_OBJ_SMALL_INT_VALUE(item));
167 } else {
168 *data++ = MP_OBJ_SMALL_INT_VALUE(item);
169 }
170 }
171
172 if (len == -1) {
173 vstr_shrink(vstr);
174 // TODO: Optimize, borrow buffer from vstr
175 len = vstr_len(vstr);
Damien George3e1a5c12014-03-29 13:43:38 +0000176 o = mp_obj_str_builder_start(&mp_type_bytes, len, &data);
Paul Sokolovsky1ecea7c2014-03-21 23:46:59 +0200177 memcpy(data, vstr_str(vstr), len);
178 vstr_free(vstr);
179 }
180
181 return mp_obj_str_builder_end(o);
182
183wrong_args:
184 nlr_jump(mp_obj_new_exception_msg(&mp_type_TypeError, "wrong number of arguments"));
185}
186
Damien George55baff42014-01-21 21:40:13 +0000187// like strstr but with specified length and allows \0 bytes
188// TODO replace with something more efficient/standard
xbe17a5a832014-03-23 23:31:58 -0700189STATIC const byte *find_subbytes(const byte *haystack, machine_uint_t hlen, const byte *needle, machine_uint_t nlen, machine_int_t direction) {
Damien George55baff42014-01-21 21:40:13 +0000190 if (hlen >= nlen) {
xbe17a5a832014-03-23 23:31:58 -0700191 machine_uint_t str_index, str_index_end;
192 if (direction > 0) {
193 str_index = 0;
194 str_index_end = hlen - nlen;
195 } else {
196 str_index = hlen - nlen;
197 str_index_end = 0;
198 }
199 for (;;) {
200 if (memcmp(&haystack[str_index], needle, nlen) == 0) {
201 //found
202 return haystack + str_index;
Damien George55baff42014-01-21 21:40:13 +0000203 }
xbe17a5a832014-03-23 23:31:58 -0700204 if (str_index == str_index_end) {
205 //not found
206 break;
Damien George55baff42014-01-21 21:40:13 +0000207 }
xbe17a5a832014-03-23 23:31:58 -0700208 str_index += direction;
Damien George55baff42014-01-21 21:40:13 +0000209 }
210 }
211 return NULL;
212}
213
Paul Sokolovskyd5df6cd2014-02-12 18:15:40 +0200214STATIC mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
Damien George5fa93b62014-01-22 14:35:10 +0000215 GET_STR_DATA_LEN(lhs_in, lhs_data, lhs_len);
Damiend99b0522013-12-21 18:17:45 +0000216 switch (op) {
Damien Georged17926d2014-03-30 13:35:08 +0100217 case MP_BINARY_OP_SUBSCR:
Paul Sokolovsky31ba60f2014-01-03 02:51:16 +0200218 // TODO: need predicate to check for int-like type (bools are such for example)
219 // ["no", "yes"][1 == 2] is common idiom
220 if (MP_OBJ_IS_SMALL_INT(rhs_in)) {
xbe9e1e8cd2014-03-12 22:57:16 -0700221 uint index = mp_get_index(mp_obj_get_type(lhs_in), lhs_len, rhs_in, false);
Damien George3e1a5c12014-03-29 13:43:38 +0000222 if (MP_OBJ_IS_TYPE(lhs_in, &mp_type_bytes)) {
Damien George7c9c6672014-01-25 00:17:36 +0000223 return MP_OBJ_NEW_SMALL_INT((mp_small_int_t)lhs_data[index]);
Paul Sokolovsky91fb1c92014-01-24 22:50:40 +0200224 } else {
225 return mp_obj_new_str(lhs_data + index, 1, true);
226 }
Paul Sokolovskye606cb62014-01-04 01:34:23 +0200227#if MICROPY_ENABLE_SLICE
Damien George3e1a5c12014-03-29 13:43:38 +0000228 } else if (MP_OBJ_IS_TYPE(rhs_in, &mp_type_slice)) {
Paul Sokolovsky7364af22014-02-02 02:38:22 +0200229 machine_uint_t start, stop;
Paul Sokolovskyea2509d2014-02-02 08:57:05 +0200230 if (!m_seq_get_fast_slice_indexes(lhs_len, rhs_in, &start, &stop)) {
231 assert(0);
232 }
Damien George5fa93b62014-01-22 14:35:10 +0000233 return mp_obj_new_str(lhs_data + start, stop - start, false);
Paul Sokolovskye606cb62014-01-04 01:34:23 +0200234#endif
Paul Sokolovsky31ba60f2014-01-03 02:51:16 +0200235 } else {
Paul Sokolovskyf8b9d3c2014-01-04 01:38:26 +0200236 // Message doesn't match CPython, but we don't have so much bytes as they
237 // to spend them on verbose wording
Damien Georgec5966122014-02-15 16:10:44 +0000238 nlr_jump(mp_obj_new_exception_msg(&mp_type_TypeError, "index must be int"));
Paul Sokolovsky31ba60f2014-01-03 02:51:16 +0200239 }
Damiend99b0522013-12-21 18:17:45 +0000240
Damien Georged17926d2014-03-30 13:35:08 +0100241 case MP_BINARY_OP_ADD:
242 case MP_BINARY_OP_INPLACE_ADD:
Damien George5fa93b62014-01-22 14:35:10 +0000243 if (MP_OBJ_IS_STR(rhs_in)) {
Damiend99b0522013-12-21 18:17:45 +0000244 // add 2 strings
Damien George5fa93b62014-01-22 14:35:10 +0000245
246 GET_STR_DATA_LEN(rhs_in, rhs_data, rhs_len);
Damien George55baff42014-01-21 21:40:13 +0000247 int alloc_len = lhs_len + rhs_len;
Damien George5fa93b62014-01-22 14:35:10 +0000248
249 /* code for making qstr
Damien George55baff42014-01-21 21:40:13 +0000250 byte *q_ptr;
251 byte *val = qstr_build_start(alloc_len, &q_ptr);
252 memcpy(val, lhs_data, lhs_len);
253 memcpy(val + lhs_len, rhs_data, rhs_len);
Damien George5fa93b62014-01-22 14:35:10 +0000254 return MP_OBJ_NEW_QSTR(qstr_build_end(q_ptr));
255 */
256
257 // code for non-qstr
258 byte *data;
Paul Sokolovsky91fb1c92014-01-24 22:50:40 +0200259 mp_obj_t s = mp_obj_str_builder_start(mp_obj_get_type(lhs_in), alloc_len, &data);
Damien George5fa93b62014-01-22 14:35:10 +0000260 memcpy(data, lhs_data, lhs_len);
261 memcpy(data + lhs_len, rhs_data, rhs_len);
262 return mp_obj_str_builder_end(s);
Damiend99b0522013-12-21 18:17:45 +0000263 }
264 break;
Damien George5fa93b62014-01-22 14:35:10 +0000265
Damien Georged17926d2014-03-30 13:35:08 +0100266 case MP_BINARY_OP_IN:
John R. Lentonc1bef212014-01-11 12:39:33 +0000267 /* NOTE `a in b` is `b.__contains__(a)` */
Damien George5fa93b62014-01-22 14:35:10 +0000268 if (MP_OBJ_IS_STR(rhs_in)) {
269 GET_STR_DATA_LEN(rhs_in, rhs_data, rhs_len);
xbe17a5a832014-03-23 23:31:58 -0700270 return MP_BOOL(find_subbytes(lhs_data, lhs_len, rhs_data, rhs_len, 1) != NULL);
John R. Lentonc1bef212014-01-11 12:39:33 +0000271 }
272 break;
Damien George5fa93b62014-01-22 14:35:10 +0000273
Damien Georged17926d2014-03-30 13:35:08 +0100274 case MP_BINARY_OP_MULTIPLY:
Paul Sokolovsky545591a2014-01-21 00:27:33 +0200275 {
276 if (!MP_OBJ_IS_SMALL_INT(rhs_in)) {
277 return NULL;
278 }
279 int n = MP_OBJ_SMALL_INT_VALUE(rhs_in);
Damien George5fa93b62014-01-22 14:35:10 +0000280 byte *data;
Paul Sokolovsky91fb1c92014-01-24 22:50:40 +0200281 mp_obj_t s = mp_obj_str_builder_start(mp_obj_get_type(lhs_in), lhs_len * n, &data);
Damien George5fa93b62014-01-22 14:35:10 +0000282 mp_seq_multiply(lhs_data, sizeof(*lhs_data), lhs_len, n, data);
283 return mp_obj_str_builder_end(s);
Paul Sokolovsky545591a2014-01-21 00:27:33 +0200284 }
Paul Sokolovsky87e85b72014-02-02 08:24:07 +0200285
Damien Georged17926d2014-03-30 13:35:08 +0100286 // These 2 are never passed here, dealt with as a special case in mp_binary_op().
287 //case MP_BINARY_OP_EQUAL:
288 //case MP_BINARY_OP_NOT_EQUAL:
289 case MP_BINARY_OP_LESS:
290 case MP_BINARY_OP_LESS_EQUAL:
291 case MP_BINARY_OP_MORE:
292 case MP_BINARY_OP_MORE_EQUAL:
Paul Sokolovsky87e85b72014-02-02 08:24:07 +0200293 if (MP_OBJ_IS_STR(rhs_in)) {
294 GET_STR_DATA_LEN(rhs_in, rhs_data, rhs_len);
295 return MP_BOOL(mp_seq_cmp_bytes(op, lhs_data, lhs_len, rhs_data, rhs_len));
296 }
Damiend99b0522013-12-21 18:17:45 +0000297 }
298
299 return MP_OBJ_NULL; // op not supported
300}
301
Paul Sokolovskyd5df6cd2014-02-12 18:15:40 +0200302STATIC mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
Damien George5fa93b62014-01-22 14:35:10 +0000303 assert(MP_OBJ_IS_STR(self_in));
Damiend99b0522013-12-21 18:17:45 +0000304
Damien Georgefe8fb912014-01-02 16:36:09 +0000305 // get separation string
Damien George5fa93b62014-01-22 14:35:10 +0000306 GET_STR_DATA_LEN(self_in, sep_str, sep_len);
Damien Georgefe8fb912014-01-02 16:36:09 +0000307
308 // process args
Damiend99b0522013-12-21 18:17:45 +0000309 uint seq_len;
310 mp_obj_t *seq_items;
Damien George07ddab52014-03-29 13:15:08 +0000311 if (MP_OBJ_IS_TYPE(arg, &mp_type_tuple)) {
Damiend99b0522013-12-21 18:17:45 +0000312 mp_obj_tuple_get(arg, &seq_len, &seq_items);
Damien George3e1a5c12014-03-29 13:43:38 +0000313 } else if (MP_OBJ_IS_TYPE(arg, &mp_type_list)) {
Damiend99b0522013-12-21 18:17:45 +0000314 mp_obj_list_get(arg, &seq_len, &seq_items);
315 } else {
316 goto bad_arg;
317 }
Damien Georgefe8fb912014-01-02 16:36:09 +0000318
319 // count required length
320 int required_len = 0;
Damiend99b0522013-12-21 18:17:45 +0000321 for (int i = 0; i < seq_len; i++) {
Damien George5fa93b62014-01-22 14:35:10 +0000322 if (!MP_OBJ_IS_STR(seq_items[i])) {
Damiend99b0522013-12-21 18:17:45 +0000323 goto bad_arg;
324 }
Damien Georgefe8fb912014-01-02 16:36:09 +0000325 if (i > 0) {
326 required_len += sep_len;
327 }
Damien George5fa93b62014-01-22 14:35:10 +0000328 GET_STR_LEN(seq_items[i], l);
329 required_len += l;
Damiend99b0522013-12-21 18:17:45 +0000330 }
331
332 // make joined string
Damien George5fa93b62014-01-22 14:35:10 +0000333 byte *data;
Paul Sokolovsky91fb1c92014-01-24 22:50:40 +0200334 mp_obj_t joined_str = mp_obj_str_builder_start(mp_obj_get_type(self_in), required_len, &data);
Damiend99b0522013-12-21 18:17:45 +0000335 for (int i = 0; i < seq_len; i++) {
Damiend99b0522013-12-21 18:17:45 +0000336 if (i > 0) {
Damien George5fa93b62014-01-22 14:35:10 +0000337 memcpy(data, sep_str, sep_len);
338 data += sep_len;
Damiend99b0522013-12-21 18:17:45 +0000339 }
Damien George5fa93b62014-01-22 14:35:10 +0000340 GET_STR_DATA_LEN(seq_items[i], s, l);
341 memcpy(data, s, l);
342 data += l;
Damiend99b0522013-12-21 18:17:45 +0000343 }
Damien Georgefe8fb912014-01-02 16:36:09 +0000344
345 // return joined string
Damien George5fa93b62014-01-22 14:35:10 +0000346 return mp_obj_str_builder_end(joined_str);
Damiend99b0522013-12-21 18:17:45 +0000347
348bad_arg:
Damien Georgec5966122014-02-15 16:10:44 +0000349 nlr_jump(mp_obj_new_exception_msg(&mp_type_TypeError, "?str.join expecting a list of str's"));
Damiend99b0522013-12-21 18:17:45 +0000350}
351
Paul Sokolovsky4c316552014-01-21 05:00:21 +0200352#define is_ws(c) ((c) == ' ' || (c) == '\t')
353
Paul Sokolovskyd5df6cd2014-02-12 18:15:40 +0200354STATIC mp_obj_t str_split(uint n_args, const mp_obj_t *args) {
Paul Sokolovsky4c316552014-01-21 05:00:21 +0200355 int splits = -1;
356 mp_obj_t sep = mp_const_none;
357 if (n_args > 1) {
358 sep = args[1];
359 if (n_args > 2) {
360 splits = MP_OBJ_SMALL_INT_VALUE(args[2]);
361 }
362 }
363 assert(sep == mp_const_none);
Damien George12eacca2014-01-21 21:54:15 +0000364 (void)sep; // unused; to hush compiler warning
Paul Sokolovsky4c316552014-01-21 05:00:21 +0200365 mp_obj_t res = mp_obj_new_list(0, NULL);
Damien George5fa93b62014-01-22 14:35:10 +0000366 GET_STR_DATA_LEN(args[0], s, len);
367 const byte *top = s + len;
368 const byte *start;
Paul Sokolovsky4c316552014-01-21 05:00:21 +0200369
370 // Initial whitespace is not counted as split, so we pre-do it
Damien George5fa93b62014-01-22 14:35:10 +0000371 while (s < top && is_ws(*s)) s++;
372 while (s < top && splits != 0) {
Paul Sokolovsky4c316552014-01-21 05:00:21 +0200373 start = s;
Damien George5fa93b62014-01-22 14:35:10 +0000374 while (s < top && !is_ws(*s)) s++;
Damien Georged17926d2014-03-30 13:35:08 +0100375 mp_list_append(res, mp_obj_new_str(start, s - start, false));
Damien George5fa93b62014-01-22 14:35:10 +0000376 if (s >= top) {
Paul Sokolovsky4c316552014-01-21 05:00:21 +0200377 break;
378 }
Damien George5fa93b62014-01-22 14:35:10 +0000379 while (s < top && is_ws(*s)) s++;
Paul Sokolovsky4c316552014-01-21 05:00:21 +0200380 if (splits > 0) {
381 splits--;
382 }
383 }
384
Damien George5fa93b62014-01-22 14:35:10 +0000385 if (s < top) {
Damien Georged17926d2014-03-30 13:35:08 +0100386 mp_list_append(res, mp_obj_new_str(s, top - s, false));
Paul Sokolovsky4c316552014-01-21 05:00:21 +0200387 }
388
389 return res;
390}
391
xbe17a5a832014-03-23 23:31:58 -0700392STATIC mp_obj_t str_finder(uint n_args, const mp_obj_t *args, machine_int_t direction) {
John R. Lentone8204912014-01-12 21:53:52 +0000393 assert(2 <= n_args && n_args <= 4);
Damien George5fa93b62014-01-22 14:35:10 +0000394 assert(MP_OBJ_IS_STR(args[0]));
395 assert(MP_OBJ_IS_STR(args[1]));
John R. Lentone8204912014-01-12 21:53:52 +0000396
Damien George5fa93b62014-01-22 14:35:10 +0000397 GET_STR_DATA_LEN(args[0], haystack, haystack_len);
398 GET_STR_DATA_LEN(args[1], needle, needle_len);
John R. Lentone8204912014-01-12 21:53:52 +0000399
xbec5538882014-03-16 17:58:35 -0700400 machine_uint_t start = 0;
401 machine_uint_t end = haystack_len;
John R. Lentone8204912014-01-12 21:53:52 +0000402 if (n_args >= 3 && args[2] != mp_const_none) {
Damien George3e1a5c12014-03-29 13:43:38 +0000403 start = mp_get_index(&mp_type_str, haystack_len, args[2], true);
John R. Lentone8204912014-01-12 21:53:52 +0000404 }
405 if (n_args >= 4 && args[3] != mp_const_none) {
Damien George3e1a5c12014-03-29 13:43:38 +0000406 end = mp_get_index(&mp_type_str, haystack_len, args[3], true);
John R. Lentone8204912014-01-12 21:53:52 +0000407 }
408
xbe17a5a832014-03-23 23:31:58 -0700409 const byte *p = find_subbytes(haystack + start, end - start, needle, needle_len, direction);
Damien George23005372014-01-13 19:39:01 +0000410 if (p == NULL) {
411 // not found
412 return MP_OBJ_NEW_SMALL_INT(-1);
413 } else {
414 // found
xbe17a5a832014-03-23 23:31:58 -0700415 return MP_OBJ_NEW_SMALL_INT(p - haystack);
John R. Lentone8204912014-01-12 21:53:52 +0000416 }
John R. Lentone8204912014-01-12 21:53:52 +0000417}
418
xbe17a5a832014-03-23 23:31:58 -0700419STATIC mp_obj_t str_find(uint n_args, const mp_obj_t *args) {
420 return str_finder(n_args, args, 1);
421}
422
423STATIC mp_obj_t str_rfind(uint n_args, const mp_obj_t *args) {
424 return str_finder(n_args, args, -1);
425}
426
Paul Sokolovsky1eacefe2014-01-23 01:20:40 +0200427// TODO: (Much) more variety in args
Paul Sokolovskyd5df6cd2014-02-12 18:15:40 +0200428STATIC mp_obj_t str_startswith(mp_obj_t self_in, mp_obj_t arg) {
Paul Sokolovsky1eacefe2014-01-23 01:20:40 +0200429 GET_STR_DATA_LEN(self_in, str, str_len);
430 GET_STR_DATA_LEN(arg, prefix, prefix_len);
431 if (prefix_len > str_len) {
432 return mp_const_false;
433 }
434 return MP_BOOL(memcmp(str, prefix, prefix_len) == 0);
435}
436
Paul Sokolovskyd5df6cd2014-02-12 18:15:40 +0200437STATIC mp_obj_t str_strip(uint n_args, const mp_obj_t *args) {
xbe7b0f39f2014-01-08 14:23:45 -0800438 assert(1 <= n_args && n_args <= 2);
Damien George5fa93b62014-01-22 14:35:10 +0000439 assert(MP_OBJ_IS_STR(args[0]));
440
441 const byte *chars_to_del;
442 uint chars_to_del_len;
443 static const byte whitespace[] = " \t\n\r\v\f";
xbe7b0f39f2014-01-08 14:23:45 -0800444
445 if (n_args == 1) {
446 chars_to_del = whitespace;
Damien George5fa93b62014-01-22 14:35:10 +0000447 chars_to_del_len = sizeof(whitespace);
xbe7b0f39f2014-01-08 14:23:45 -0800448 } else {
Damien George5fa93b62014-01-22 14:35:10 +0000449 assert(MP_OBJ_IS_STR(args[1]));
450 GET_STR_DATA_LEN(args[1], s, l);
451 chars_to_del = s;
452 chars_to_del_len = l;
xbe7b0f39f2014-01-08 14:23:45 -0800453 }
454
Damien George5fa93b62014-01-22 14:35:10 +0000455 GET_STR_DATA_LEN(args[0], orig_str, orig_str_len);
xbe7b0f39f2014-01-08 14:23:45 -0800456
xbec5538882014-03-16 17:58:35 -0700457 machine_uint_t first_good_char_pos = 0;
xbe7b0f39f2014-01-08 14:23:45 -0800458 bool first_good_char_pos_set = false;
xbec5538882014-03-16 17:58:35 -0700459 machine_uint_t last_good_char_pos = 0;
460 for (machine_uint_t i = 0; i < orig_str_len; i++) {
xbe17a5a832014-03-23 23:31:58 -0700461 if (find_subbytes(chars_to_del, chars_to_del_len, &orig_str[i], 1, 1) == NULL) {
xbe7b0f39f2014-01-08 14:23:45 -0800462 last_good_char_pos = i;
463 if (!first_good_char_pos_set) {
464 first_good_char_pos = i;
465 first_good_char_pos_set = true;
466 }
467 }
468 }
469
470 if (first_good_char_pos == 0 && last_good_char_pos == 0) {
Damien George5fa93b62014-01-22 14:35:10 +0000471 // string is all whitespace, return ''
472 return MP_OBJ_NEW_QSTR(MP_QSTR_);
xbe7b0f39f2014-01-08 14:23:45 -0800473 }
474
475 assert(last_good_char_pos >= first_good_char_pos);
476 //+1 to accomodate the last character
xbec5538882014-03-16 17:58:35 -0700477 machine_uint_t stripped_len = last_good_char_pos - first_good_char_pos + 1;
Damien George5fa93b62014-01-22 14:35:10 +0000478 return mp_obj_new_str(orig_str + first_good_char_pos, stripped_len, false);
xbe7b0f39f2014-01-08 14:23:45 -0800479}
480
Damien Georgea11ceca2014-01-19 16:02:09 +0000481mp_obj_t str_format(uint n_args, const mp_obj_t *args) {
Damien George5fa93b62014-01-22 14:35:10 +0000482 assert(MP_OBJ_IS_STR(args[0]));
Damiend99b0522013-12-21 18:17:45 +0000483
Damien George5fa93b62014-01-22 14:35:10 +0000484 GET_STR_DATA_LEN(args[0], str, len);
Damiend99b0522013-12-21 18:17:45 +0000485 int arg_i = 1;
486 vstr_t *vstr = vstr_new();
Damien George5fa93b62014-01-22 14:35:10 +0000487 for (const byte *top = str + len; str < top; str++) {
Damiend99b0522013-12-21 18:17:45 +0000488 if (*str == '{') {
489 str++;
Damien George5fa93b62014-01-22 14:35:10 +0000490 if (str < top && *str == '{') {
Damiend99b0522013-12-21 18:17:45 +0000491 vstr_add_char(vstr, '{');
Paul Sokolovskyf2b796e2014-01-15 22:45:20 +0200492 } else {
Damien George5fa93b62014-01-22 14:35:10 +0000493 while (str < top && *str != '}') str++;
Damiend99b0522013-12-21 18:17:45 +0000494 if (arg_i >= n_args) {
Damien Georgec5966122014-02-15 16:10:44 +0000495 nlr_jump(mp_obj_new_exception_msg(&mp_type_IndexError, "tuple index out of range"));
Damiend99b0522013-12-21 18:17:45 +0000496 }
Paul Sokolovsky76d982e2014-01-13 19:19:16 +0200497 // TODO: may be PRINT_REPR depending on formatting code
Damien George4899ff92014-01-15 22:39:03 +0000498 mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, vstr, args[arg_i], PRINT_STR);
Damiend99b0522013-12-21 18:17:45 +0000499 arg_i++;
500 }
501 } else {
502 vstr_add_char(vstr, *str);
503 }
504 }
505
Damien George5fa93b62014-01-22 14:35:10 +0000506 mp_obj_t s = mp_obj_new_str((byte*)vstr->buf, vstr->len, false);
507 vstr_free(vstr);
508 return s;
Damiend99b0522013-12-21 18:17:45 +0000509}
510
Paul Sokolovskyd5df6cd2014-02-12 18:15:40 +0200511STATIC mp_obj_t str_replace(uint n_args, const mp_obj_t *args) {
xbe480c15a2014-01-30 22:17:30 -0800512 assert(MP_OBJ_IS_STR(args[0]));
513 assert(MP_OBJ_IS_STR(args[1]));
514 assert(MP_OBJ_IS_STR(args[2]));
515
Damien George94f68302014-01-31 23:45:12 +0000516 machine_int_t max_rep = 0;
xbe480c15a2014-01-30 22:17:30 -0800517 if (n_args == 4) {
Paul Sokolovsky4e246082014-02-11 15:29:55 +0200518 assert(MP_OBJ_IS_SMALL_INT(args[3]));
519 max_rep = MP_OBJ_SMALL_INT_VALUE(args[3]);
520 if (max_rep == 0) {
521 return args[0];
522 } else if (max_rep < 0) {
523 max_rep = 0;
524 }
xbe480c15a2014-01-30 22:17:30 -0800525 }
Damien George94f68302014-01-31 23:45:12 +0000526
527 // if max_rep is still 0 by this point we will need to do all possible replacements
xbe480c15a2014-01-30 22:17:30 -0800528
529 GET_STR_DATA_LEN(args[0], str, str_len);
530 GET_STR_DATA_LEN(args[1], old, old_len);
531 GET_STR_DATA_LEN(args[2], new, new_len);
Damien George94f68302014-01-31 23:45:12 +0000532
533 // old won't exist in str if it's longer, so nothing to replace
xbe480c15a2014-01-30 22:17:30 -0800534 if (old_len > str_len) {
Paul Sokolovsky4e246082014-02-11 15:29:55 +0200535 return args[0];
xbe480c15a2014-01-30 22:17:30 -0800536 }
537
Damien George94f68302014-01-31 23:45:12 +0000538 // data for the replaced string
539 byte *data = NULL;
540 mp_obj_t replaced_str = MP_OBJ_NULL;
xbe480c15a2014-01-30 22:17:30 -0800541
Damien George94f68302014-01-31 23:45:12 +0000542 // do 2 passes over the string:
543 // first pass computes the required length of the replaced string
544 // second pass does the replacements
545 for (;;) {
546 machine_uint_t replaced_str_index = 0;
547 machine_uint_t num_replacements_done = 0;
548 const byte *old_occurrence;
549 const byte *offset_ptr = str;
550 machine_uint_t offset_num = 0;
xbe17a5a832014-03-23 23:31:58 -0700551 while ((old_occurrence = find_subbytes(offset_ptr, str_len - offset_num, old, old_len, 1)) != NULL) {
Damien George94f68302014-01-31 23:45:12 +0000552 // copy from just after end of last occurrence of to-be-replaced string to right before start of next occurrence
553 if (data != NULL) {
554 memcpy(data + replaced_str_index, offset_ptr, old_occurrence - offset_ptr);
555 }
556 replaced_str_index += old_occurrence - offset_ptr;
557 // copy the replacement string
558 if (data != NULL) {
559 memcpy(data + replaced_str_index, new, new_len);
560 }
561 replaced_str_index += new_len;
562 offset_ptr = old_occurrence + old_len;
563 offset_num = offset_ptr - str;
xbe480c15a2014-01-30 22:17:30 -0800564
Damien George94f68302014-01-31 23:45:12 +0000565 num_replacements_done++;
566 if (max_rep != 0 && num_replacements_done == max_rep){
567 break;
568 }
569 }
570
571 // copy from just after end of last occurrence of to-be-replaced string to end of old string
572 if (data != NULL) {
573 memcpy(data + replaced_str_index, offset_ptr, str_len - offset_num);
574 }
575 replaced_str_index += str_len - offset_num;
576
577 if (data == NULL) {
578 // first pass
579 if (num_replacements_done == 0) {
580 // no substr found, return original string
581 return args[0];
582 } else {
583 // substr found, allocate new string
584 replaced_str = mp_obj_str_builder_start(mp_obj_get_type(args[0]), replaced_str_index, &data);
585 }
586 } else {
587 // second pass, we are done
588 break;
589 }
xbe480c15a2014-01-30 22:17:30 -0800590 }
Damien George94f68302014-01-31 23:45:12 +0000591
xbe480c15a2014-01-30 22:17:30 -0800592 return mp_obj_str_builder_end(replaced_str);
593}
594
xbe9e1e8cd2014-03-12 22:57:16 -0700595STATIC mp_obj_t str_count(uint n_args, const mp_obj_t *args) {
596 assert(2 <= n_args && n_args <= 4);
597 assert(MP_OBJ_IS_STR(args[0]));
598 assert(MP_OBJ_IS_STR(args[1]));
599
600 GET_STR_DATA_LEN(args[0], haystack, haystack_len);
601 GET_STR_DATA_LEN(args[1], needle, needle_len);
602
Damien George536dde22014-03-13 22:07:55 +0000603 machine_uint_t start = 0;
604 machine_uint_t end = haystack_len;
xbe9e1e8cd2014-03-12 22:57:16 -0700605 if (n_args >= 3 && args[2] != mp_const_none) {
Damien George3e1a5c12014-03-29 13:43:38 +0000606 start = mp_get_index(&mp_type_str, haystack_len, args[2], true);
xbe9e1e8cd2014-03-12 22:57:16 -0700607 }
608 if (n_args >= 4 && args[3] != mp_const_none) {
Damien George3e1a5c12014-03-29 13:43:38 +0000609 end = mp_get_index(&mp_type_str, haystack_len, args[3], true);
xbe9e1e8cd2014-03-12 22:57:16 -0700610 }
611
Damien George536dde22014-03-13 22:07:55 +0000612 // if needle_len is zero then we count each gap between characters as an occurrence
613 if (needle_len == 0) {
614 return MP_OBJ_NEW_SMALL_INT(end - start + 1);
xbe9e1e8cd2014-03-12 22:57:16 -0700615 }
616
Damien George536dde22014-03-13 22:07:55 +0000617 // count the occurrences
618 machine_int_t num_occurrences = 0;
xbec5d70ba2014-03-13 00:29:15 -0700619 for (machine_uint_t haystack_index = start; haystack_index + needle_len <= end; haystack_index++) {
620 if (memcmp(&haystack[haystack_index], needle, needle_len) == 0) {
621 num_occurrences++;
622 haystack_index += needle_len - 1;
623 }
xbe9e1e8cd2014-03-12 22:57:16 -0700624 }
625
626 return MP_OBJ_NEW_SMALL_INT(num_occurrences);
627}
628
Damien Georgeb035db32014-03-21 20:39:40 +0000629STATIC mp_obj_t str_partitioner(mp_obj_t self_in, mp_obj_t arg, machine_int_t direction) {
xbe613a8e32014-03-18 00:06:29 -0700630 assert(MP_OBJ_IS_STR(self_in));
631 if (!MP_OBJ_IS_STR(arg)) {
632 nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
633 "Can't convert '%s' object to str implicitly", mp_obj_get_type_str(arg)));
634 }
Damien Georgeb035db32014-03-21 20:39:40 +0000635
xbe613a8e32014-03-18 00:06:29 -0700636 GET_STR_DATA_LEN(self_in, str, str_len);
637 GET_STR_DATA_LEN(arg, sep, sep_len);
638
639 if (sep_len == 0) {
640 nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "empty separator"));
641 }
Damien Georgeb035db32014-03-21 20:39:40 +0000642
643 mp_obj_t result[] = {MP_OBJ_NEW_QSTR(MP_QSTR_), MP_OBJ_NEW_QSTR(MP_QSTR_), MP_OBJ_NEW_QSTR(MP_QSTR_)};
644
645 if (direction > 0) {
646 result[0] = self_in;
xbe0a6894c2014-03-21 01:12:26 -0700647 } else {
Damien Georgeb035db32014-03-21 20:39:40 +0000648 result[2] = self_in;
xbe0a6894c2014-03-21 01:12:26 -0700649 }
xbe613a8e32014-03-18 00:06:29 -0700650
xbe17a5a832014-03-23 23:31:58 -0700651 const byte *position_ptr = find_subbytes(str, str_len, sep, sep_len, direction);
652 if (position_ptr != NULL) {
653 machine_uint_t position = position_ptr - str;
654 result[0] = mp_obj_new_str(str, position, false);
655 result[1] = arg;
656 result[2] = mp_obj_new_str(str + position + sep_len, str_len - position - sep_len, false);
xbe613a8e32014-03-18 00:06:29 -0700657 }
Damien Georgeb035db32014-03-21 20:39:40 +0000658
xbe0a6894c2014-03-21 01:12:26 -0700659 return mp_obj_new_tuple(3, result);
xbe613a8e32014-03-18 00:06:29 -0700660}
661
Damien Georgeb035db32014-03-21 20:39:40 +0000662STATIC mp_obj_t str_partition(mp_obj_t self_in, mp_obj_t arg) {
663 return str_partitioner(self_in, arg, 1);
xbe0a6894c2014-03-21 01:12:26 -0700664}
xbe4504ea82014-03-19 00:46:14 -0700665
Damien Georgeb035db32014-03-21 20:39:40 +0000666STATIC mp_obj_t str_rpartition(mp_obj_t self_in, mp_obj_t arg) {
667 return str_partitioner(self_in, arg, -1);
xbe4504ea82014-03-19 00:46:14 -0700668}
669
Damien George2da98302014-03-09 19:58:18 +0000670STATIC machine_int_t str_get_buffer(mp_obj_t self_in, buffer_info_t *bufinfo, int flags) {
671 if (flags == BUFFER_READ) {
672 GET_STR_DATA_LEN(self_in, str_data, str_len);
673 bufinfo->buf = (void*)str_data;
674 bufinfo->len = str_len;
675 return 0;
676 } else {
677 // can't write to a string
678 bufinfo->buf = NULL;
679 bufinfo->len = 0;
680 return 1;
681 }
682}
683
Paul Sokolovskyd5df6cd2014-02-12 18:15:40 +0200684STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_find_obj, 2, 4, str_find);
xbe17a5a832014-03-23 23:31:58 -0700685STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rfind_obj, 2, 4, str_rfind);
Paul Sokolovskyd5df6cd2014-02-12 18:15:40 +0200686STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_join_obj, str_join);
687STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_split_obj, 1, 3, str_split);
688STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_startswith_obj, str_startswith);
689STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_strip_obj, 1, 2, str_strip);
690STATIC MP_DEFINE_CONST_FUN_OBJ_VAR(str_format_obj, 1, str_format);
691STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_replace_obj, 3, 4, str_replace);
xbe9e1e8cd2014-03-12 22:57:16 -0700692STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_count_obj, 2, 4, str_count);
xbe613a8e32014-03-18 00:06:29 -0700693STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_partition_obj, str_partition);
xbe4504ea82014-03-19 00:46:14 -0700694STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_rpartition_obj, str_rpartition);
Damiend99b0522013-12-21 18:17:45 +0000695
Damien George9b196cd2014-03-26 21:47:19 +0000696STATIC const mp_map_elem_t str_locals_dict_table[] = {
697 { MP_OBJ_NEW_QSTR(MP_QSTR_find), (mp_obj_t)&str_find_obj },
698 { MP_OBJ_NEW_QSTR(MP_QSTR_rfind), (mp_obj_t)&str_rfind_obj },
699 { MP_OBJ_NEW_QSTR(MP_QSTR_join), (mp_obj_t)&str_join_obj },
700 { MP_OBJ_NEW_QSTR(MP_QSTR_split), (mp_obj_t)&str_split_obj },
701 { MP_OBJ_NEW_QSTR(MP_QSTR_startswith), (mp_obj_t)&str_startswith_obj },
702 { MP_OBJ_NEW_QSTR(MP_QSTR_strip), (mp_obj_t)&str_strip_obj },
703 { MP_OBJ_NEW_QSTR(MP_QSTR_format), (mp_obj_t)&str_format_obj },
704 { MP_OBJ_NEW_QSTR(MP_QSTR_replace), (mp_obj_t)&str_replace_obj },
705 { MP_OBJ_NEW_QSTR(MP_QSTR_count), (mp_obj_t)&str_count_obj },
706 { MP_OBJ_NEW_QSTR(MP_QSTR_partition), (mp_obj_t)&str_partition_obj },
707 { MP_OBJ_NEW_QSTR(MP_QSTR_rpartition), (mp_obj_t)&str_rpartition_obj },
ian-v7a16fad2014-01-06 09:52:29 -0800708};
Damien George97209d32014-01-07 15:58:30 +0000709
Damien George9b196cd2014-03-26 21:47:19 +0000710STATIC MP_DEFINE_CONST_DICT(str_locals_dict, str_locals_dict_table);
711
Damien George3e1a5c12014-03-29 13:43:38 +0000712const mp_obj_type_t mp_type_str = {
Damien Georgec5966122014-02-15 16:10:44 +0000713 { &mp_type_type },
Damien Georgea71c83a2014-02-15 11:34:50 +0000714 .name = MP_QSTR_str,
Paul Sokolovsky860ffb02014-01-05 22:34:09 +0200715 .print = str_print,
Paul Sokolovskybe020c22014-03-21 11:39:01 +0200716 .make_new = str_make_new,
Paul Sokolovsky860ffb02014-01-05 22:34:09 +0200717 .binary_op = str_binary_op,
Paul Sokolovsky91fb1c92014-01-24 22:50:40 +0200718 .getiter = mp_obj_new_str_iterator,
Damien George2da98302014-03-09 19:58:18 +0000719 .buffer_p = { .get_buffer = str_get_buffer },
Damien George9b196cd2014-03-26 21:47:19 +0000720 .locals_dict = (mp_obj_t)&str_locals_dict,
Damiend99b0522013-12-21 18:17:45 +0000721};
722
Paul Sokolovsky91fb1c92014-01-24 22:50:40 +0200723// Reuses most of methods from str
Damien George3e1a5c12014-03-29 13:43:38 +0000724const mp_obj_type_t mp_type_bytes = {
Damien Georgec5966122014-02-15 16:10:44 +0000725 { &mp_type_type },
Damien Georgea71c83a2014-02-15 11:34:50 +0000726 .name = MP_QSTR_bytes,
Paul Sokolovsky91fb1c92014-01-24 22:50:40 +0200727 .print = str_print,
Paul Sokolovsky1ecea7c2014-03-21 23:46:59 +0200728 .make_new = bytes_make_new,
Paul Sokolovsky91fb1c92014-01-24 22:50:40 +0200729 .binary_op = str_binary_op,
730 .getiter = mp_obj_new_bytes_iterator,
Damien George9b196cd2014-03-26 21:47:19 +0000731 .locals_dict = (mp_obj_t)&str_locals_dict,
Paul Sokolovsky91fb1c92014-01-24 22:50:40 +0200732};
733
Paul Sokolovsky1ecea7c2014-03-21 23:46:59 +0200734// the zero-length bytes
Damien George3e1a5c12014-03-29 13:43:38 +0000735STATIC const mp_obj_str_t empty_bytes_obj = {{&mp_type_bytes}, 0, 0, NULL};
Paul Sokolovsky1ecea7c2014-03-21 23:46:59 +0200736const mp_obj_t mp_const_empty_bytes = (mp_obj_t)&empty_bytes_obj;
737
Paul Sokolovsky91fb1c92014-01-24 22:50:40 +0200738mp_obj_t mp_obj_str_builder_start(const mp_obj_type_t *type, uint len, byte **data) {
Paul Sokolovsky5972b4c2014-03-20 16:47:44 +0200739 mp_obj_str_t *o = m_new_obj(mp_obj_str_t);
Paul Sokolovsky91fb1c92014-01-24 22:50:40 +0200740 o->base.type = type;
Damien George5fa93b62014-01-22 14:35:10 +0000741 o->len = len;
Paul Sokolovsky5972b4c2014-03-20 16:47:44 +0200742 byte *p = m_new(byte, len + 1);
743 o->data = p;
744 *data = p;
Damiend99b0522013-12-21 18:17:45 +0000745 return o;
746}
747
Damien George5fa93b62014-01-22 14:35:10 +0000748mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in) {
Damien George5fa93b62014-01-22 14:35:10 +0000749 mp_obj_str_t *o = o_in;
750 o->hash = qstr_compute_hash(o->data, o->len);
Paul Sokolovsky5972b4c2014-03-20 16:47:44 +0200751 byte *p = (byte*)o->data;
752 p[o->len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings
Damien George5fa93b62014-01-22 14:35:10 +0000753 return o;
754}
755
Paul Sokolovskyd5df6cd2014-02-12 18:15:40 +0200756STATIC mp_obj_t str_new(const mp_obj_type_t *type, const byte* data, uint len) {
Paul Sokolovsky5972b4c2014-03-20 16:47:44 +0200757 mp_obj_str_t *o = m_new_obj(mp_obj_str_t);
Paul Sokolovsky91fb1c92014-01-24 22:50:40 +0200758 o->base.type = type;
Paul Sokolovsky91fb1c92014-01-24 22:50:40 +0200759 o->len = len;
Paul Sokolovsky5972b4c2014-03-20 16:47:44 +0200760 if (data) {
761 o->hash = qstr_compute_hash(data, len);
762 byte *p = m_new(byte, len + 1);
763 o->data = p;
764 memcpy(p, data, len * sizeof(byte));
765 p[len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings
766 }
Paul Sokolovsky91fb1c92014-01-24 22:50:40 +0200767 return o;
768}
769
Damien George5fa93b62014-01-22 14:35:10 +0000770mp_obj_t mp_obj_new_str(const byte* data, uint len, bool make_qstr_if_not_already) {
771 qstr q = qstr_find_strn(data, len);
772 if (q != MP_QSTR_NULL) {
773 // qstr with this data already exists
774 return MP_OBJ_NEW_QSTR(q);
775 } else if (make_qstr_if_not_already) {
776 // no existing qstr, make a new one
777 return MP_OBJ_NEW_QSTR(qstr_from_strn((const char*)data, len));
778 } else {
779 // no existing qstr, don't make one
Damien George3e1a5c12014-03-29 13:43:38 +0000780 return str_new(&mp_type_str, data, len);
Paul Sokolovsky8965a5e2014-01-20 23:33:19 +0200781 }
Damien George5fa93b62014-01-22 14:35:10 +0000782}
783
Paul Sokolovsky91fb1c92014-01-24 22:50:40 +0200784mp_obj_t mp_obj_new_bytes(const byte* data, uint len) {
Damien George3e1a5c12014-03-29 13:43:38 +0000785 return str_new(&mp_type_bytes, data, len);
Paul Sokolovsky91fb1c92014-01-24 22:50:40 +0200786}
787
Damien George5fa93b62014-01-22 14:35:10 +0000788bool mp_obj_str_equal(mp_obj_t s1, mp_obj_t s2) {
789 if (MP_OBJ_IS_QSTR(s1) && MP_OBJ_IS_QSTR(s2)) {
790 return s1 == s2;
791 } else {
792 GET_STR_HASH(s1, h1);
793 GET_STR_HASH(s2, h2);
794 if (h1 != h2) {
795 return false;
796 }
797 GET_STR_DATA_LEN(s1, d1, l1);
798 GET_STR_DATA_LEN(s2, d2, l2);
799 if (l1 != l2) {
800 return false;
801 }
Damien George1e708fe2014-01-23 18:27:51 +0000802 return memcmp(d1, d2, l1) == 0;
Paul Sokolovsky8965a5e2014-01-20 23:33:19 +0200803 }
Damien George5fa93b62014-01-22 14:35:10 +0000804}
805
Damien Georgeb829b5c2014-01-25 13:51:19 +0000806void bad_implicit_conversion(mp_obj_t self_in) __attribute__((noreturn));
807void bad_implicit_conversion(mp_obj_t self_in) {
Damien Georgec5966122014-02-15 16:10:44 +0000808 nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_TypeError, "Can't convert '%s' object to str implicitly", mp_obj_get_type_str(self_in)));
Damien Georgeb829b5c2014-01-25 13:51:19 +0000809}
810
Damien George5fa93b62014-01-22 14:35:10 +0000811uint mp_obj_str_get_hash(mp_obj_t self_in) {
812 if (MP_OBJ_IS_STR(self_in)) {
813 GET_STR_HASH(self_in, h);
814 return h;
815 } else {
Damien Georgeb829b5c2014-01-25 13:51:19 +0000816 bad_implicit_conversion(self_in);
Damien George5fa93b62014-01-22 14:35:10 +0000817 }
818}
819
820uint mp_obj_str_get_len(mp_obj_t self_in) {
821 if (MP_OBJ_IS_STR(self_in)) {
822 GET_STR_LEN(self_in, l);
823 return l;
824 } else {
Damien Georgeb829b5c2014-01-25 13:51:19 +0000825 bad_implicit_conversion(self_in);
826 }
827}
828
829// use this if you will anyway convert the string to a qstr
830// will be more efficient for the case where it's already a qstr
831qstr mp_obj_str_get_qstr(mp_obj_t self_in) {
832 if (MP_OBJ_IS_QSTR(self_in)) {
833 return MP_OBJ_QSTR_VALUE(self_in);
Damien George3e1a5c12014-03-29 13:43:38 +0000834 } else if (MP_OBJ_IS_TYPE(self_in, &mp_type_str)) {
Damien Georgeb829b5c2014-01-25 13:51:19 +0000835 mp_obj_str_t *self = self_in;
836 return qstr_from_strn((char*)self->data, self->len);
837 } else {
838 bad_implicit_conversion(self_in);
Damien George5fa93b62014-01-22 14:35:10 +0000839 }
840}
841
842// only use this function if you need the str data to be zero terminated
843// at the moment all strings are zero terminated to help with C ASCIIZ compatibility
844const char *mp_obj_str_get_str(mp_obj_t self_in) {
845 if (MP_OBJ_IS_STR(self_in)) {
846 GET_STR_DATA_LEN(self_in, s, l);
847 (void)l; // len unused
848 return (const char*)s;
849 } else {
Damien Georgeb829b5c2014-01-25 13:51:19 +0000850 bad_implicit_conversion(self_in);
Damien George5fa93b62014-01-22 14:35:10 +0000851 }
852}
853
Damien George698ec212014-02-08 18:17:23 +0000854const char *mp_obj_str_get_data(mp_obj_t self_in, uint *len) {
Damien George5fa93b62014-01-22 14:35:10 +0000855 if (MP_OBJ_IS_STR(self_in)) {
856 GET_STR_DATA_LEN(self_in, s, l);
857 *len = l;
Damien George698ec212014-02-08 18:17:23 +0000858 return (const char*)s;
Damien George5fa93b62014-01-22 14:35:10 +0000859 } else {
Damien Georgeb829b5c2014-01-25 13:51:19 +0000860 bad_implicit_conversion(self_in);
Damien George5fa93b62014-01-22 14:35:10 +0000861 }
Damiend99b0522013-12-21 18:17:45 +0000862}
xyb8cfc9f02014-01-05 18:47:51 +0800863
864/******************************************************************************/
865/* str iterator */
866
867typedef struct _mp_obj_str_it_t {
868 mp_obj_base_t base;
Damien George5fa93b62014-01-22 14:35:10 +0000869 mp_obj_t str;
xyb8cfc9f02014-01-05 18:47:51 +0800870 machine_uint_t cur;
871} mp_obj_str_it_t;
872
Paul Sokolovskyd5df6cd2014-02-12 18:15:40 +0200873STATIC mp_obj_t str_it_iternext(mp_obj_t self_in) {
xyb8cfc9f02014-01-05 18:47:51 +0800874 mp_obj_str_it_t *self = self_in;
Damien George5fa93b62014-01-22 14:35:10 +0000875 GET_STR_DATA_LEN(self->str, str, len);
876 if (self->cur < len) {
877 mp_obj_t o_out = mp_obj_new_str(str + self->cur, 1, true);
xyb8cfc9f02014-01-05 18:47:51 +0800878 self->cur += 1;
879 return o_out;
880 } else {
Damien George66eaf842014-03-26 19:27:58 +0000881 return MP_OBJ_NULL;
xyb8cfc9f02014-01-05 18:47:51 +0800882 }
883}
884
Damien George3e1a5c12014-03-29 13:43:38 +0000885STATIC const mp_obj_type_t mp_type_str_it = {
Damien Georgec5966122014-02-15 16:10:44 +0000886 { &mp_type_type },
Damien Georgea71c83a2014-02-15 11:34:50 +0000887 .name = MP_QSTR_iterator,
Paul Sokolovskyf7eaf602014-03-30 22:00:12 +0300888 .getiter = mp_identity,
Paul Sokolovsky860ffb02014-01-05 22:34:09 +0200889 .iternext = str_it_iternext,
xyb8cfc9f02014-01-05 18:47:51 +0800890};
891
Paul Sokolovskyd5df6cd2014-02-12 18:15:40 +0200892STATIC mp_obj_t bytes_it_iternext(mp_obj_t self_in) {
Paul Sokolovsky91fb1c92014-01-24 22:50:40 +0200893 mp_obj_str_it_t *self = self_in;
894 GET_STR_DATA_LEN(self->str, str, len);
895 if (self->cur < len) {
Damien George7c9c6672014-01-25 00:17:36 +0000896 mp_obj_t o_out = MP_OBJ_NEW_SMALL_INT((mp_small_int_t)str[self->cur]);
Paul Sokolovsky91fb1c92014-01-24 22:50:40 +0200897 self->cur += 1;
898 return o_out;
899 } else {
Damien George66eaf842014-03-26 19:27:58 +0000900 return MP_OBJ_NULL;
Paul Sokolovsky91fb1c92014-01-24 22:50:40 +0200901 }
902}
903
Damien George3e1a5c12014-03-29 13:43:38 +0000904STATIC const mp_obj_type_t mp_type_bytes_it = {
Damien Georgec5966122014-02-15 16:10:44 +0000905 { &mp_type_type },
Damien Georgea71c83a2014-02-15 11:34:50 +0000906 .name = MP_QSTR_iterator,
Paul Sokolovskyf7eaf602014-03-30 22:00:12 +0300907 .getiter = mp_identity,
Paul Sokolovsky91fb1c92014-01-24 22:50:40 +0200908 .iternext = bytes_it_iternext,
909};
910
911mp_obj_t mp_obj_new_str_iterator(mp_obj_t str) {
xyb8cfc9f02014-01-05 18:47:51 +0800912 mp_obj_str_it_t *o = m_new_obj(mp_obj_str_it_t);
Damien George3e1a5c12014-03-29 13:43:38 +0000913 o->base.type = &mp_type_str_it;
xyb8cfc9f02014-01-05 18:47:51 +0800914 o->str = str;
Paul Sokolovsky91fb1c92014-01-24 22:50:40 +0200915 o->cur = 0;
916 return o;
917}
918
919mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str) {
920 mp_obj_str_it_t *o = m_new_obj(mp_obj_str_it_t);
Damien George3e1a5c12014-03-29 13:43:38 +0000921 o->base.type = &mp_type_bytes_it;
Paul Sokolovsky91fb1c92014-01-24 22:50:40 +0200922 o->str = str;
923 o->cur = 0;
xyb8cfc9f02014-01-05 18:47:51 +0800924 return o;
925}