blob: 8b7ab9692f769a9040bd769fc08b0a6f2e11446c [file] [log] [blame]
Damiend99b0522013-12-21 18:17:45 +00001#include <stdlib.h>
2#include <stdint.h>
3#include <stdarg.h>
4#include <string.h>
5#include <assert.h>
6
7#include "nlr.h"
8#include "misc.h"
9#include "mpconfig.h"
Damien Georgeeb7bfcb2014-01-04 15:57:35 +000010#include "mpqstr.h"
Damiend99b0522013-12-21 18:17:45 +000011#include "obj.h"
12#include "runtime0.h"
13#include "runtime.h"
14
15typedef struct _mp_obj_str_t {
16 mp_obj_base_t base;
17 qstr qstr;
18} mp_obj_str_t;
19
xyb8cfc9f02014-01-05 18:47:51 +080020static mp_obj_t mp_obj_new_str_iterator(mp_obj_str_t *str, int cur);
21
22/******************************************************************************/
23/* str */
24
Damiend99b0522013-12-21 18:17:45 +000025void str_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in) {
26 mp_obj_str_t *self = self_in;
27 // TODO need to escape chars etc
28 print(env, "'%s'", qstr_str(self->qstr));
29}
30
31mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
32 mp_obj_str_t *lhs = lhs_in;
33 const char *lhs_str = qstr_str(lhs->qstr);
34 switch (op) {
35 case RT_BINARY_OP_SUBSCR:
Paul Sokolovsky31ba60f2014-01-03 02:51:16 +020036 // TODO: need predicate to check for int-like type (bools are such for example)
37 // ["no", "yes"][1 == 2] is common idiom
38 if (MP_OBJ_IS_SMALL_INT(rhs_in)) {
39 // TODO: This implements byte string access for single index so far
Paul Sokolovskyf8b9d3c2014-01-04 01:38:26 +020040 // TODO: Handle negative indexes.
Paul Sokolovsky31ba60f2014-01-03 02:51:16 +020041 return mp_obj_new_int(lhs_str[mp_obj_get_int(rhs_in)]);
Paul Sokolovskye606cb62014-01-04 01:34:23 +020042#if MICROPY_ENABLE_SLICE
Paul Sokolovsky31ba60f2014-01-03 02:51:16 +020043 } else if (MP_OBJ_IS_TYPE(rhs_in, &slice_type)) {
Damien Georgec8d13842014-01-04 01:06:10 +000044 machine_int_t start, stop, step;
Paul Sokolovsky31ba60f2014-01-03 02:51:16 +020045 mp_obj_slice_get(rhs_in, &start, &stop, &step);
46 assert(step == 1);
Paul Sokolovskydecad082014-01-03 23:36:56 +020047 int len = strlen(lhs_str);
48 if (start < 0) {
49 start = len + start;
Paul Sokolovsky6ee1e382014-01-04 03:47:34 +020050 if (start < 0) {
51 start = 0;
52 }
53 } else if (start > len) {
54 start = len;
Paul Sokolovskydecad082014-01-03 23:36:56 +020055 }
56 if (stop <= 0) {
57 stop = len + stop;
Paul Sokolovsky6ee1e382014-01-04 03:47:34 +020058 // CPython returns empty string in such case
59 if (stop < 0) {
60 stop = start;
61 }
62 } else if (stop > len) {
63 stop = len;
Paul Sokolovskydecad082014-01-03 23:36:56 +020064 }
Paul Sokolovsky31ba60f2014-01-03 02:51:16 +020065 return mp_obj_new_str(qstr_from_strn_copy(lhs_str + start, stop - start));
Paul Sokolovskye606cb62014-01-04 01:34:23 +020066#endif
Paul Sokolovsky31ba60f2014-01-03 02:51:16 +020067 } else {
Paul Sokolovskyf8b9d3c2014-01-04 01:38:26 +020068 // Message doesn't match CPython, but we don't have so much bytes as they
69 // to spend them on verbose wording
Damien Georgeeb7bfcb2014-01-04 15:57:35 +000070 nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError, "index must be int"));
Paul Sokolovsky31ba60f2014-01-03 02:51:16 +020071 }
Damiend99b0522013-12-21 18:17:45 +000072
73 case RT_BINARY_OP_ADD:
74 case RT_BINARY_OP_INPLACE_ADD:
75 if (MP_OBJ_IS_TYPE(rhs_in, &str_type)) {
76 // add 2 strings
77 const char *rhs_str = qstr_str(((mp_obj_str_t*)rhs_in)->qstr);
Damien Georgefe8fb912014-01-02 16:36:09 +000078 size_t lhs_len = strlen(lhs_str);
79 size_t rhs_len = strlen(rhs_str);
80 int alloc_len = lhs_len + rhs_len + 1;
Damien732407f2013-12-29 19:33:23 +000081 char *val = m_new(char, alloc_len);
Damien Georgefe8fb912014-01-02 16:36:09 +000082 memcpy(val, lhs_str, lhs_len);
83 memcpy(val + lhs_len, rhs_str, rhs_len);
84 val[lhs_len + rhs_len] = '\0';
Damien732407f2013-12-29 19:33:23 +000085 return mp_obj_new_str(qstr_from_str_take(val, alloc_len));
Damiend99b0522013-12-21 18:17:45 +000086 }
87 break;
John R. Lentonc1bef212014-01-11 12:39:33 +000088 case RT_COMPARE_OP_IN:
89 case RT_COMPARE_OP_NOT_IN:
90 /* NOTE `a in b` is `b.__contains__(a)` */
91 if (MP_OBJ_IS_TYPE(rhs_in, &str_type)) {
92 const char *rhs_str = qstr_str(((mp_obj_str_t*)rhs_in)->qstr);
93 /* FIXME \0 in strs */
94 return MP_BOOL((op == RT_COMPARE_OP_IN) ^ (strstr(lhs_str, rhs_str) == NULL));
95 }
96 break;
Damiend99b0522013-12-21 18:17:45 +000097 }
98
99 return MP_OBJ_NULL; // op not supported
100}
101
xyb8cfc9f02014-01-05 18:47:51 +0800102static mp_obj_t str_getiter(mp_obj_t o_in) {
103 return mp_obj_new_str_iterator(o_in, 0);
104}
105
Damiend99b0522013-12-21 18:17:45 +0000106mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
107 assert(MP_OBJ_IS_TYPE(self_in, &str_type));
108 mp_obj_str_t *self = self_in;
Damiend99b0522013-12-21 18:17:45 +0000109
Damien Georgefe8fb912014-01-02 16:36:09 +0000110 // get separation string
111 const char *sep_str = qstr_str(self->qstr);
112 size_t sep_len = strlen(sep_str);
113
114 // process args
Damiend99b0522013-12-21 18:17:45 +0000115 uint seq_len;
116 mp_obj_t *seq_items;
117 if (MP_OBJ_IS_TYPE(arg, &tuple_type)) {
118 mp_obj_tuple_get(arg, &seq_len, &seq_items);
119 } else if (MP_OBJ_IS_TYPE(arg, &list_type)) {
120 mp_obj_list_get(arg, &seq_len, &seq_items);
121 } else {
122 goto bad_arg;
123 }
Damien Georgefe8fb912014-01-02 16:36:09 +0000124
125 // count required length
126 int required_len = 0;
Damiend99b0522013-12-21 18:17:45 +0000127 for (int i = 0; i < seq_len; i++) {
128 if (!MP_OBJ_IS_TYPE(seq_items[i], &str_type)) {
129 goto bad_arg;
130 }
Damien Georgefe8fb912014-01-02 16:36:09 +0000131 if (i > 0) {
132 required_len += sep_len;
133 }
Damiend99b0522013-12-21 18:17:45 +0000134 required_len += strlen(qstr_str(mp_obj_str_get(seq_items[i])));
135 }
136
137 // make joined string
138 char *joined_str = m_new(char, required_len + 1);
Damien Georgefe8fb912014-01-02 16:36:09 +0000139 char *s_dest = joined_str;
Damiend99b0522013-12-21 18:17:45 +0000140 for (int i = 0; i < seq_len; i++) {
Damiend99b0522013-12-21 18:17:45 +0000141 if (i > 0) {
Damien Georgefe8fb912014-01-02 16:36:09 +0000142 memcpy(s_dest, sep_str, sep_len);
143 s_dest += sep_len;
Damiend99b0522013-12-21 18:17:45 +0000144 }
Damien Georgefe8fb912014-01-02 16:36:09 +0000145 const char *s2 = qstr_str(mp_obj_str_get(seq_items[i]));
146 size_t s2_len = strlen(s2);
147 memcpy(s_dest, s2, s2_len);
148 s_dest += s2_len;
Damiend99b0522013-12-21 18:17:45 +0000149 }
Damien Georgefe8fb912014-01-02 16:36:09 +0000150 *s_dest = '\0';
151
152 // return joined string
Damien732407f2013-12-29 19:33:23 +0000153 return mp_obj_new_str(qstr_from_str_take(joined_str, required_len + 1));
Damiend99b0522013-12-21 18:17:45 +0000154
155bad_arg:
Damien Georgeeb7bfcb2014-01-04 15:57:35 +0000156 nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError, "?str.join expecting a list of str's"));
Damiend99b0522013-12-21 18:17:45 +0000157}
158
xbe7b0f39f2014-01-08 14:23:45 -0800159static bool chr_in_str(const char* const str, const size_t str_len, const char c) {
160 for (size_t i = 0; i < str_len; i++) {
161 if (str[i] == c) {
162 return true;
163 }
164 }
165 return false;
166}
167
John R. Lentone8204912014-01-12 21:53:52 +0000168static mp_obj_t str_find(int n_args, const mp_obj_t *args) {
169 assert(2 <= n_args && n_args <= 4);
170 assert(MP_OBJ_IS_TYPE(args[0], &str_type));
171 if (!MP_OBJ_IS_TYPE(args[1], &str_type)) {
172 nlr_jump(mp_obj_new_exception_msg_1_arg(
173 MP_QSTR_TypeError,
174 "Can't convert '%s' object to str implicitly",
175 mp_obj_get_type_str(args[1])));
176 }
177
178 const char* haystack = qstr_str(((mp_obj_str_t*)args[0])->qstr);
179 const char* needle = qstr_str(((mp_obj_str_t*)args[1])->qstr);
180
Damien George23005372014-01-13 19:39:01 +0000181 size_t haystack_len = strlen(haystack);
182 size_t needle_len = strlen(needle);
John R. Lentone8204912014-01-12 21:53:52 +0000183
184 size_t start = 0;
185 size_t end = haystack_len;
186 /* TODO use a non-exception-throwing mp_get_index */
187 if (n_args >= 3 && args[2] != mp_const_none) {
188 start = mp_get_index(&str_type, haystack_len, args[2]);
189 }
190 if (n_args >= 4 && args[3] != mp_const_none) {
191 end = mp_get_index(&str_type, haystack_len, args[3]);
192 }
193
194 char *p = strstr(haystack + start, needle);
Damien George23005372014-01-13 19:39:01 +0000195 if (p == NULL) {
196 // not found
197 return MP_OBJ_NEW_SMALL_INT(-1);
198 } else {
199 // found
200 machine_int_t pos = p - haystack;
John R. Lentone8204912014-01-12 21:53:52 +0000201 if (pos + needle_len > end) {
202 pos = -1;
203 }
Damien George23005372014-01-13 19:39:01 +0000204 return MP_OBJ_NEW_SMALL_INT(pos);
John R. Lentone8204912014-01-12 21:53:52 +0000205 }
John R. Lentone8204912014-01-12 21:53:52 +0000206}
207
xbe7b0f39f2014-01-08 14:23:45 -0800208mp_obj_t str_strip(int n_args, const mp_obj_t *args) {
209 assert(1 <= n_args && n_args <= 2);
210 assert(MP_OBJ_IS_TYPE(args[0], &str_type));
211 const char *chars_to_del;
212 static const char whitespace[] = " \t\n\r\v\f";
213
214 if (n_args == 1) {
215 chars_to_del = whitespace;
216 } else {
217 assert(MP_OBJ_IS_TYPE(args[1], &str_type));
218 mp_obj_str_t *chars_to_del_obj = args[1];
219 chars_to_del = qstr_str(chars_to_del_obj->qstr);
220 }
221
222 const size_t chars_to_del_len = strlen(chars_to_del);
223 mp_obj_str_t *self = args[0];
224 const char *orig_str = qstr_str(self->qstr);
225 const size_t orig_str_len = strlen(orig_str);
226
227 size_t first_good_char_pos = 0;
228 bool first_good_char_pos_set = false;
229 size_t last_good_char_pos = 0;
230 for (size_t i = 0; i < orig_str_len; i++) {
231 if (!chr_in_str(chars_to_del, chars_to_del_len, orig_str[i])) {
232 last_good_char_pos = i;
233 if (!first_good_char_pos_set) {
234 first_good_char_pos = i;
235 first_good_char_pos_set = true;
236 }
237 }
238 }
239
240 if (first_good_char_pos == 0 && last_good_char_pos == 0) {
241 //string is all whitespace, return '\0'
242 char *empty = m_new(char, 1);
243 empty[0] = '\0';
244 return mp_obj_new_str(qstr_from_str_take(empty, 1));
245 }
246
247 assert(last_good_char_pos >= first_good_char_pos);
248 //+1 to accomodate the last character
249 size_t stripped_len = last_good_char_pos - first_good_char_pos + 1;
250 //+1 to accomodate '\0'
251 char *stripped_str = m_new(char, stripped_len + 1);
Damien George2d454292014-01-09 22:04:45 +0000252 memcpy(stripped_str, orig_str + first_good_char_pos, stripped_len);
xbe7b0f39f2014-01-08 14:23:45 -0800253 stripped_str[stripped_len] = '\0';
254 return mp_obj_new_str(qstr_from_str_take(stripped_str, stripped_len + 1));
255}
256
Damiend99b0522013-12-21 18:17:45 +0000257void vstr_printf_wrapper(void *env, const char *fmt, ...) {
258 va_list args;
259 va_start(args, fmt);
260 vstr_vprintf(env, fmt, args);
261 va_end(args);
262}
263
264mp_obj_t str_format(int n_args, const mp_obj_t *args) {
265 assert(MP_OBJ_IS_TYPE(args[0], &str_type));
266 mp_obj_str_t *self = args[0];
267
268 const char *str = qstr_str(self->qstr);
269 int arg_i = 1;
270 vstr_t *vstr = vstr_new();
271 for (; *str; str++) {
272 if (*str == '{') {
273 str++;
274 if (*str == '{') {
275 vstr_add_char(vstr, '{');
276 } else if (*str == '}') {
277 if (arg_i >= n_args) {
Damien Georgeeb7bfcb2014-01-04 15:57:35 +0000278 nlr_jump(mp_obj_new_exception_msg(MP_QSTR_IndexError, "tuple index out of range"));
Damiend99b0522013-12-21 18:17:45 +0000279 }
280 mp_obj_print_helper(vstr_printf_wrapper, vstr, args[arg_i]);
281 arg_i++;
282 }
283 } else {
284 vstr_add_char(vstr, *str);
285 }
286 }
287
Damien732407f2013-12-29 19:33:23 +0000288 return mp_obj_new_str(qstr_from_str_take(vstr->buf, vstr->alloc));
Damiend99b0522013-12-21 18:17:45 +0000289}
290
John R. Lentone8204912014-01-12 21:53:52 +0000291static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_find_obj, 2, 4, str_find);
Damiend99b0522013-12-21 18:17:45 +0000292static MP_DEFINE_CONST_FUN_OBJ_2(str_join_obj, str_join);
xbe7b0f39f2014-01-08 14:23:45 -0800293static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_strip_obj, 1, 2, str_strip);
Damiend99b0522013-12-21 18:17:45 +0000294static MP_DEFINE_CONST_FUN_OBJ_VAR(str_format_obj, 1, str_format);
295
ian-va5a01df2014-01-06 14:14:11 -0800296static const mp_method_t str_type_methods[] = {
John R. Lentone8204912014-01-12 21:53:52 +0000297 { "find", &str_find_obj },
ian-v7a16fad2014-01-06 09:52:29 -0800298 { "join", &str_join_obj },
xbe7b0f39f2014-01-08 14:23:45 -0800299 { "strip", &str_strip_obj },
ian-v7a16fad2014-01-06 09:52:29 -0800300 { "format", &str_format_obj },
301 { NULL, NULL }, // end-of-list sentinel
302};
Damien George97209d32014-01-07 15:58:30 +0000303
Damiend99b0522013-12-21 18:17:45 +0000304const mp_obj_type_t str_type = {
305 { &mp_const_type },
306 "str",
Paul Sokolovsky860ffb02014-01-05 22:34:09 +0200307 .print = str_print,
308 .binary_op = str_binary_op,
309 .getiter = str_getiter,
ian-v7a16fad2014-01-06 09:52:29 -0800310 .methods = str_type_methods,
Damiend99b0522013-12-21 18:17:45 +0000311};
312
313mp_obj_t mp_obj_new_str(qstr qstr) {
314 mp_obj_str_t *o = m_new_obj(mp_obj_str_t);
315 o->base.type = &str_type;
316 o->qstr = qstr;
317 return o;
318}
319
320qstr mp_obj_str_get(mp_obj_t self_in) {
321 assert(MP_OBJ_IS_TYPE(self_in, &str_type));
322 mp_obj_str_t *self = self_in;
323 return self->qstr;
324}
xyb8cfc9f02014-01-05 18:47:51 +0800325
326/******************************************************************************/
327/* str iterator */
328
329typedef struct _mp_obj_str_it_t {
330 mp_obj_base_t base;
331 mp_obj_str_t *str;
332 machine_uint_t cur;
333} mp_obj_str_it_t;
334
335mp_obj_t str_it_iternext(mp_obj_t self_in) {
336 mp_obj_str_it_t *self = self_in;
337 const char *str = qstr_str(self->str->qstr);
338 if (self->cur < strlen(str)) {
339 mp_obj_t o_out = mp_obj_new_str(qstr_from_strn_copy(str + self->cur, 1));
340 self->cur += 1;
341 return o_out;
342 } else {
343 return mp_const_stop_iteration;
344 }
345}
346
347static const mp_obj_type_t str_it_type = {
348 { &mp_const_type },
349 "str_iterator",
Paul Sokolovsky860ffb02014-01-05 22:34:09 +0200350 .iternext = str_it_iternext,
xyb8cfc9f02014-01-05 18:47:51 +0800351};
352
353mp_obj_t mp_obj_new_str_iterator(mp_obj_str_t *str, int cur) {
354 mp_obj_str_it_t *o = m_new_obj(mp_obj_str_it_t);
355 o->base.type = &str_it_type;
356 o->str = str;
357 o->cur = cur;
358 return o;
359}