blob: 758e8c2938bb512c2da29bd99e43003988a276aa [file] [log] [blame]
Damiend99b0522013-12-21 18:17:45 +00001#include <stdlib.h>
2#include <stdint.h>
3#include <stdarg.h>
4#include <string.h>
5#include <assert.h>
6
7#include "nlr.h"
8#include "misc.h"
9#include "mpconfig.h"
Damien Georgeeb7bfcb2014-01-04 15:57:35 +000010#include "mpqstr.h"
Damiend99b0522013-12-21 18:17:45 +000011#include "obj.h"
12#include "runtime0.h"
13#include "runtime.h"
14
15typedef struct _mp_obj_str_t {
16 mp_obj_base_t base;
17 qstr qstr;
18} mp_obj_str_t;
19
xyb8cfc9f02014-01-05 18:47:51 +080020static mp_obj_t mp_obj_new_str_iterator(mp_obj_str_t *str, int cur);
21
22/******************************************************************************/
23/* str */
24
Paul Sokolovskybb33cc62014-01-20 00:59:25 +020025void mp_obj_str_print_qstr(void (*print)(void *env, const char *fmt, ...), void *env, qstr q, mp_print_kind_t kind) {
Paul Sokolovsky76d982e2014-01-13 19:19:16 +020026 if (kind == PRINT_STR) {
Paul Sokolovskybb33cc62014-01-20 00:59:25 +020027 print(env, "%s", qstr_str(q));
Paul Sokolovsky76d982e2014-01-13 19:19:16 +020028 } else {
29 // TODO need to escape chars etc
Paul Sokolovskybb33cc62014-01-20 00:59:25 +020030 print(env, "'%s'", qstr_str(q));
Paul Sokolovsky76d982e2014-01-13 19:19:16 +020031 }
Damiend99b0522013-12-21 18:17:45 +000032}
33
Paul Sokolovskybb33cc62014-01-20 00:59:25 +020034void str_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) {
35 mp_obj_str_t *self = self_in;
36 mp_obj_str_print_qstr(print, env, self->qstr, kind);
37}
38
Damiend99b0522013-12-21 18:17:45 +000039mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
40 mp_obj_str_t *lhs = lhs_in;
41 const char *lhs_str = qstr_str(lhs->qstr);
42 switch (op) {
43 case RT_BINARY_OP_SUBSCR:
Paul Sokolovsky31ba60f2014-01-03 02:51:16 +020044 // TODO: need predicate to check for int-like type (bools are such for example)
45 // ["no", "yes"][1 == 2] is common idiom
46 if (MP_OBJ_IS_SMALL_INT(rhs_in)) {
Paul Sokolovsky7380a832014-01-21 02:22:02 +020047 uint index = mp_get_index(lhs->base.type, strlen(lhs_str), rhs_in);
48 return mp_obj_new_str(qstr_from_strn_copy(lhs_str + index, 1));
Paul Sokolovskye606cb62014-01-04 01:34:23 +020049#if MICROPY_ENABLE_SLICE
Paul Sokolovsky31ba60f2014-01-03 02:51:16 +020050 } else if (MP_OBJ_IS_TYPE(rhs_in, &slice_type)) {
Damien Georgec8d13842014-01-04 01:06:10 +000051 machine_int_t start, stop, step;
Paul Sokolovsky31ba60f2014-01-03 02:51:16 +020052 mp_obj_slice_get(rhs_in, &start, &stop, &step);
53 assert(step == 1);
Paul Sokolovskydecad082014-01-03 23:36:56 +020054 int len = strlen(lhs_str);
55 if (start < 0) {
56 start = len + start;
Paul Sokolovsky6ee1e382014-01-04 03:47:34 +020057 if (start < 0) {
58 start = 0;
59 }
60 } else if (start > len) {
61 start = len;
Paul Sokolovskydecad082014-01-03 23:36:56 +020062 }
63 if (stop <= 0) {
64 stop = len + stop;
Paul Sokolovsky6ee1e382014-01-04 03:47:34 +020065 // CPython returns empty string in such case
66 if (stop < 0) {
67 stop = start;
68 }
69 } else if (stop > len) {
70 stop = len;
Paul Sokolovskydecad082014-01-03 23:36:56 +020071 }
Paul Sokolovsky31ba60f2014-01-03 02:51:16 +020072 return mp_obj_new_str(qstr_from_strn_copy(lhs_str + start, stop - start));
Paul Sokolovskye606cb62014-01-04 01:34:23 +020073#endif
Paul Sokolovsky31ba60f2014-01-03 02:51:16 +020074 } else {
Paul Sokolovskyf8b9d3c2014-01-04 01:38:26 +020075 // Message doesn't match CPython, but we don't have so much bytes as they
76 // to spend them on verbose wording
Damien Georgeeb7bfcb2014-01-04 15:57:35 +000077 nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError, "index must be int"));
Paul Sokolovsky31ba60f2014-01-03 02:51:16 +020078 }
Damiend99b0522013-12-21 18:17:45 +000079
80 case RT_BINARY_OP_ADD:
81 case RT_BINARY_OP_INPLACE_ADD:
82 if (MP_OBJ_IS_TYPE(rhs_in, &str_type)) {
83 // add 2 strings
84 const char *rhs_str = qstr_str(((mp_obj_str_t*)rhs_in)->qstr);
Damien Georgefe8fb912014-01-02 16:36:09 +000085 size_t lhs_len = strlen(lhs_str);
86 size_t rhs_len = strlen(rhs_str);
87 int alloc_len = lhs_len + rhs_len + 1;
Damien732407f2013-12-29 19:33:23 +000088 char *val = m_new(char, alloc_len);
Damien Georgefe8fb912014-01-02 16:36:09 +000089 memcpy(val, lhs_str, lhs_len);
90 memcpy(val + lhs_len, rhs_str, rhs_len);
91 val[lhs_len + rhs_len] = '\0';
Damien732407f2013-12-29 19:33:23 +000092 return mp_obj_new_str(qstr_from_str_take(val, alloc_len));
Damiend99b0522013-12-21 18:17:45 +000093 }
94 break;
John R. Lentonc1bef212014-01-11 12:39:33 +000095 case RT_COMPARE_OP_IN:
96 case RT_COMPARE_OP_NOT_IN:
97 /* NOTE `a in b` is `b.__contains__(a)` */
98 if (MP_OBJ_IS_TYPE(rhs_in, &str_type)) {
99 const char *rhs_str = qstr_str(((mp_obj_str_t*)rhs_in)->qstr);
100 /* FIXME \0 in strs */
101 return MP_BOOL((op == RT_COMPARE_OP_IN) ^ (strstr(lhs_str, rhs_str) == NULL));
102 }
103 break;
Paul Sokolovsky545591a2014-01-21 00:27:33 +0200104 case RT_BINARY_OP_MULTIPLY:
105 {
106 if (!MP_OBJ_IS_SMALL_INT(rhs_in)) {
107 return NULL;
108 }
109 int n = MP_OBJ_SMALL_INT_VALUE(rhs_in);
110 size_t len = strlen(lhs_str);
111 char *s = m_new(char, len * n + 1);
112 s[len * n] = 0;
113 mp_seq_multiply(lhs_str, sizeof(*lhs_str), len, n, s);
114 return MP_OBJ_NEW_QSTR(qstr_from_str_take(s, len * n + 1));
115 }
Damiend99b0522013-12-21 18:17:45 +0000116 }
117
118 return MP_OBJ_NULL; // op not supported
119}
120
xyb8cfc9f02014-01-05 18:47:51 +0800121static mp_obj_t str_getiter(mp_obj_t o_in) {
122 return mp_obj_new_str_iterator(o_in, 0);
123}
124
Damiend99b0522013-12-21 18:17:45 +0000125mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
126 assert(MP_OBJ_IS_TYPE(self_in, &str_type));
127 mp_obj_str_t *self = self_in;
Damiend99b0522013-12-21 18:17:45 +0000128
Damien Georgefe8fb912014-01-02 16:36:09 +0000129 // get separation string
130 const char *sep_str = qstr_str(self->qstr);
131 size_t sep_len = strlen(sep_str);
132
133 // process args
Damiend99b0522013-12-21 18:17:45 +0000134 uint seq_len;
135 mp_obj_t *seq_items;
136 if (MP_OBJ_IS_TYPE(arg, &tuple_type)) {
137 mp_obj_tuple_get(arg, &seq_len, &seq_items);
138 } else if (MP_OBJ_IS_TYPE(arg, &list_type)) {
139 mp_obj_list_get(arg, &seq_len, &seq_items);
140 } else {
141 goto bad_arg;
142 }
Damien Georgefe8fb912014-01-02 16:36:09 +0000143
144 // count required length
145 int required_len = 0;
Damiend99b0522013-12-21 18:17:45 +0000146 for (int i = 0; i < seq_len; i++) {
147 if (!MP_OBJ_IS_TYPE(seq_items[i], &str_type)) {
148 goto bad_arg;
149 }
Damien Georgefe8fb912014-01-02 16:36:09 +0000150 if (i > 0) {
151 required_len += sep_len;
152 }
Damiend99b0522013-12-21 18:17:45 +0000153 required_len += strlen(qstr_str(mp_obj_str_get(seq_items[i])));
154 }
155
156 // make joined string
157 char *joined_str = m_new(char, required_len + 1);
Damien Georgefe8fb912014-01-02 16:36:09 +0000158 char *s_dest = joined_str;
Damiend99b0522013-12-21 18:17:45 +0000159 for (int i = 0; i < seq_len; i++) {
Damiend99b0522013-12-21 18:17:45 +0000160 if (i > 0) {
Damien Georgefe8fb912014-01-02 16:36:09 +0000161 memcpy(s_dest, sep_str, sep_len);
162 s_dest += sep_len;
Damiend99b0522013-12-21 18:17:45 +0000163 }
Damien Georgefe8fb912014-01-02 16:36:09 +0000164 const char *s2 = qstr_str(mp_obj_str_get(seq_items[i]));
165 size_t s2_len = strlen(s2);
166 memcpy(s_dest, s2, s2_len);
167 s_dest += s2_len;
Damiend99b0522013-12-21 18:17:45 +0000168 }
Damien Georgefe8fb912014-01-02 16:36:09 +0000169 *s_dest = '\0';
170
171 // return joined string
Damien732407f2013-12-29 19:33:23 +0000172 return mp_obj_new_str(qstr_from_str_take(joined_str, required_len + 1));
Damiend99b0522013-12-21 18:17:45 +0000173
174bad_arg:
Damien Georgeeb7bfcb2014-01-04 15:57:35 +0000175 nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError, "?str.join expecting a list of str's"));
Damiend99b0522013-12-21 18:17:45 +0000176}
177
Paul Sokolovsky4c316552014-01-21 05:00:21 +0200178#define is_ws(c) ((c) == ' ' || (c) == '\t')
179
180static mp_obj_t str_split(uint n_args, const mp_obj_t *args) {
181 int splits = -1;
182 mp_obj_t sep = mp_const_none;
183 if (n_args > 1) {
184 sep = args[1];
185 if (n_args > 2) {
186 splits = MP_OBJ_SMALL_INT_VALUE(args[2]);
187 }
188 }
189 assert(sep == mp_const_none);
190 mp_obj_t res = mp_obj_new_list(0, NULL);
191 const char *s = qstr_str(mp_obj_str_get(args[0]));
192 const char *start;
193
194 // Initial whitespace is not counted as split, so we pre-do it
195 while (is_ws(*s)) s++;
196 while (*s && splits != 0) {
197 start = s;
198 while (*s != 0 && !is_ws(*s)) s++;
199 rt_list_append(res, MP_OBJ_NEW_QSTR(qstr_from_strn_copy(start, s - start)));
200 if (*s == 0) {
201 break;
202 }
203 while (is_ws(*s)) s++;
204 if (splits > 0) {
205 splits--;
206 }
207 }
208
209 if (*s != 0) {
210 rt_list_append(res, MP_OBJ_NEW_QSTR(qstr_from_strn_copy(s, strlen(s))));
211 }
212
213 return res;
214}
215
xbe7b0f39f2014-01-08 14:23:45 -0800216static bool chr_in_str(const char* const str, const size_t str_len, const char c) {
217 for (size_t i = 0; i < str_len; i++) {
218 if (str[i] == c) {
219 return true;
220 }
221 }
222 return false;
223}
224
Damien Georgea11ceca2014-01-19 16:02:09 +0000225static mp_obj_t str_find(uint n_args, const mp_obj_t *args) {
John R. Lentone8204912014-01-12 21:53:52 +0000226 assert(2 <= n_args && n_args <= 4);
Paul Sokolovsky8965a5e2014-01-20 23:33:19 +0200227 const char* haystack = qstr_str(mp_obj_str_get(args[0]));
228 const char* needle = qstr_str(mp_obj_str_get(args[1]));
John R. Lentone8204912014-01-12 21:53:52 +0000229
Damien George23005372014-01-13 19:39:01 +0000230 size_t haystack_len = strlen(haystack);
231 size_t needle_len = strlen(needle);
John R. Lentone8204912014-01-12 21:53:52 +0000232
233 size_t start = 0;
234 size_t end = haystack_len;
235 /* TODO use a non-exception-throwing mp_get_index */
236 if (n_args >= 3 && args[2] != mp_const_none) {
237 start = mp_get_index(&str_type, haystack_len, args[2]);
238 }
239 if (n_args >= 4 && args[3] != mp_const_none) {
240 end = mp_get_index(&str_type, haystack_len, args[3]);
241 }
242
243 char *p = strstr(haystack + start, needle);
Damien George23005372014-01-13 19:39:01 +0000244 if (p == NULL) {
245 // not found
246 return MP_OBJ_NEW_SMALL_INT(-1);
247 } else {
248 // found
249 machine_int_t pos = p - haystack;
John R. Lentone8204912014-01-12 21:53:52 +0000250 if (pos + needle_len > end) {
251 pos = -1;
252 }
Damien George23005372014-01-13 19:39:01 +0000253 return MP_OBJ_NEW_SMALL_INT(pos);
John R. Lentone8204912014-01-12 21:53:52 +0000254 }
John R. Lentone8204912014-01-12 21:53:52 +0000255}
256
Damien Georgea11ceca2014-01-19 16:02:09 +0000257mp_obj_t str_strip(uint n_args, const mp_obj_t *args) {
xbe7b0f39f2014-01-08 14:23:45 -0800258 assert(1 <= n_args && n_args <= 2);
259 assert(MP_OBJ_IS_TYPE(args[0], &str_type));
260 const char *chars_to_del;
261 static const char whitespace[] = " \t\n\r\v\f";
262
263 if (n_args == 1) {
264 chars_to_del = whitespace;
265 } else {
Paul Sokolovsky8965a5e2014-01-20 23:33:19 +0200266 chars_to_del = qstr_str(mp_obj_str_get(args[1]));
xbe7b0f39f2014-01-08 14:23:45 -0800267 }
268
269 const size_t chars_to_del_len = strlen(chars_to_del);
Paul Sokolovsky8965a5e2014-01-20 23:33:19 +0200270 const char *orig_str = qstr_str(mp_obj_str_get(args[0]));
xbe7b0f39f2014-01-08 14:23:45 -0800271 const size_t orig_str_len = strlen(orig_str);
272
273 size_t first_good_char_pos = 0;
274 bool first_good_char_pos_set = false;
275 size_t last_good_char_pos = 0;
276 for (size_t i = 0; i < orig_str_len; i++) {
277 if (!chr_in_str(chars_to_del, chars_to_del_len, orig_str[i])) {
278 last_good_char_pos = i;
279 if (!first_good_char_pos_set) {
280 first_good_char_pos = i;
281 first_good_char_pos_set = true;
282 }
283 }
284 }
285
286 if (first_good_char_pos == 0 && last_good_char_pos == 0) {
287 //string is all whitespace, return '\0'
288 char *empty = m_new(char, 1);
289 empty[0] = '\0';
290 return mp_obj_new_str(qstr_from_str_take(empty, 1));
291 }
292
293 assert(last_good_char_pos >= first_good_char_pos);
294 //+1 to accomodate the last character
295 size_t stripped_len = last_good_char_pos - first_good_char_pos + 1;
296 //+1 to accomodate '\0'
297 char *stripped_str = m_new(char, stripped_len + 1);
Damien George2d454292014-01-09 22:04:45 +0000298 memcpy(stripped_str, orig_str + first_good_char_pos, stripped_len);
xbe7b0f39f2014-01-08 14:23:45 -0800299 stripped_str[stripped_len] = '\0';
300 return mp_obj_new_str(qstr_from_str_take(stripped_str, stripped_len + 1));
301}
302
Damien Georgea11ceca2014-01-19 16:02:09 +0000303mp_obj_t str_format(uint n_args, const mp_obj_t *args) {
Damiend99b0522013-12-21 18:17:45 +0000304 assert(MP_OBJ_IS_TYPE(args[0], &str_type));
305 mp_obj_str_t *self = args[0];
306
307 const char *str = qstr_str(self->qstr);
308 int arg_i = 1;
309 vstr_t *vstr = vstr_new();
310 for (; *str; str++) {
311 if (*str == '{') {
312 str++;
313 if (*str == '{') {
314 vstr_add_char(vstr, '{');
Paul Sokolovskyf2b796e2014-01-15 22:45:20 +0200315 } else {
316 while (*str != '}') str++;
Damiend99b0522013-12-21 18:17:45 +0000317 if (arg_i >= n_args) {
Damien Georgeeb7bfcb2014-01-04 15:57:35 +0000318 nlr_jump(mp_obj_new_exception_msg(MP_QSTR_IndexError, "tuple index out of range"));
Damiend99b0522013-12-21 18:17:45 +0000319 }
Paul Sokolovsky76d982e2014-01-13 19:19:16 +0200320 // TODO: may be PRINT_REPR depending on formatting code
Damien George4899ff92014-01-15 22:39:03 +0000321 mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, vstr, args[arg_i], PRINT_STR);
Damiend99b0522013-12-21 18:17:45 +0000322 arg_i++;
323 }
324 } else {
325 vstr_add_char(vstr, *str);
326 }
327 }
328
Damien732407f2013-12-29 19:33:23 +0000329 return mp_obj_new_str(qstr_from_str_take(vstr->buf, vstr->alloc));
Damiend99b0522013-12-21 18:17:45 +0000330}
331
John R. Lentone8204912014-01-12 21:53:52 +0000332static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_find_obj, 2, 4, str_find);
Damiend99b0522013-12-21 18:17:45 +0000333static MP_DEFINE_CONST_FUN_OBJ_2(str_join_obj, str_join);
Paul Sokolovsky4c316552014-01-21 05:00:21 +0200334static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_split_obj, 1, 3, str_split);
xbe7b0f39f2014-01-08 14:23:45 -0800335static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_strip_obj, 1, 2, str_strip);
Damiend99b0522013-12-21 18:17:45 +0000336static MP_DEFINE_CONST_FUN_OBJ_VAR(str_format_obj, 1, str_format);
337
ian-va5a01df2014-01-06 14:14:11 -0800338static const mp_method_t str_type_methods[] = {
John R. Lentone8204912014-01-12 21:53:52 +0000339 { "find", &str_find_obj },
ian-v7a16fad2014-01-06 09:52:29 -0800340 { "join", &str_join_obj },
Paul Sokolovsky4c316552014-01-21 05:00:21 +0200341 { "split", &str_split_obj },
xbe7b0f39f2014-01-08 14:23:45 -0800342 { "strip", &str_strip_obj },
ian-v7a16fad2014-01-06 09:52:29 -0800343 { "format", &str_format_obj },
344 { NULL, NULL }, // end-of-list sentinel
345};
Damien George97209d32014-01-07 15:58:30 +0000346
Damiend99b0522013-12-21 18:17:45 +0000347const mp_obj_type_t str_type = {
348 { &mp_const_type },
349 "str",
Paul Sokolovsky860ffb02014-01-05 22:34:09 +0200350 .print = str_print,
351 .binary_op = str_binary_op,
352 .getiter = str_getiter,
ian-v7a16fad2014-01-06 09:52:29 -0800353 .methods = str_type_methods,
Damiend99b0522013-12-21 18:17:45 +0000354};
355
356mp_obj_t mp_obj_new_str(qstr qstr) {
357 mp_obj_str_t *o = m_new_obj(mp_obj_str_t);
358 o->base.type = &str_type;
359 o->qstr = qstr;
360 return o;
361}
362
363qstr mp_obj_str_get(mp_obj_t self_in) {
Paul Sokolovsky8965a5e2014-01-20 23:33:19 +0200364 if (MP_OBJ_IS_QSTR(self_in)) {
365 return MP_OBJ_QSTR_VALUE(self_in);
366 }
367 if (MP_OBJ_IS_TYPE(self_in, &str_type)) {
368 mp_obj_str_t *self = self_in;
369 return self->qstr;
370 }
371 nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "Can't convert '%s' object to str implicitly",
372 mp_obj_get_type_str(self_in)));
Damiend99b0522013-12-21 18:17:45 +0000373}
xyb8cfc9f02014-01-05 18:47:51 +0800374
375/******************************************************************************/
376/* str iterator */
377
378typedef struct _mp_obj_str_it_t {
379 mp_obj_base_t base;
380 mp_obj_str_t *str;
381 machine_uint_t cur;
382} mp_obj_str_it_t;
383
384mp_obj_t str_it_iternext(mp_obj_t self_in) {
385 mp_obj_str_it_t *self = self_in;
386 const char *str = qstr_str(self->str->qstr);
387 if (self->cur < strlen(str)) {
388 mp_obj_t o_out = mp_obj_new_str(qstr_from_strn_copy(str + self->cur, 1));
389 self->cur += 1;
390 return o_out;
391 } else {
392 return mp_const_stop_iteration;
393 }
394}
395
396static const mp_obj_type_t str_it_type = {
397 { &mp_const_type },
398 "str_iterator",
Paul Sokolovsky860ffb02014-01-05 22:34:09 +0200399 .iternext = str_it_iternext,
xyb8cfc9f02014-01-05 18:47:51 +0800400};
401
402mp_obj_t mp_obj_new_str_iterator(mp_obj_str_t *str, int cur) {
403 mp_obj_str_it_t *o = m_new_obj(mp_obj_str_it_t);
404 o->base.type = &str_it_type;
405 o->str = str;
406 o->cur = cur;
407 return o;
408}