blob: 81fd952915c20ff9ec210ff3c58496f93a1b15da [file] [log] [blame]
Damiend99b0522013-12-21 18:17:45 +00001#include <stdlib.h>
2#include <stdint.h>
3#include <stdarg.h>
4#include <string.h>
5#include <assert.h>
6
7#include "nlr.h"
8#include "misc.h"
9#include "mpconfig.h"
Damien Georgeeb7bfcb2014-01-04 15:57:35 +000010#include "mpqstr.h"
Damiend99b0522013-12-21 18:17:45 +000011#include "obj.h"
12#include "runtime0.h"
13#include "runtime.h"
14
15typedef struct _mp_obj_str_t {
16 mp_obj_base_t base;
17 qstr qstr;
18} mp_obj_str_t;
19
xyb8cfc9f02014-01-05 18:47:51 +080020static mp_obj_t mp_obj_new_str_iterator(mp_obj_str_t *str, int cur);
21
22/******************************************************************************/
23/* str */
24
Paul Sokolovsky76d982e2014-01-13 19:19:16 +020025void str_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) {
Damiend99b0522013-12-21 18:17:45 +000026 mp_obj_str_t *self = self_in;
Paul Sokolovsky76d982e2014-01-13 19:19:16 +020027 if (kind == PRINT_STR) {
28 print(env, "%s", qstr_str(self->qstr));
29 } else {
30 // TODO need to escape chars etc
31 print(env, "'%s'", qstr_str(self->qstr));
32 }
Damiend99b0522013-12-21 18:17:45 +000033}
34
35mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
36 mp_obj_str_t *lhs = lhs_in;
37 const char *lhs_str = qstr_str(lhs->qstr);
38 switch (op) {
39 case RT_BINARY_OP_SUBSCR:
Paul Sokolovsky31ba60f2014-01-03 02:51:16 +020040 // TODO: need predicate to check for int-like type (bools are such for example)
41 // ["no", "yes"][1 == 2] is common idiom
42 if (MP_OBJ_IS_SMALL_INT(rhs_in)) {
43 // TODO: This implements byte string access for single index so far
Paul Sokolovskyf8b9d3c2014-01-04 01:38:26 +020044 // TODO: Handle negative indexes.
Paul Sokolovsky31ba60f2014-01-03 02:51:16 +020045 return mp_obj_new_int(lhs_str[mp_obj_get_int(rhs_in)]);
Paul Sokolovskye606cb62014-01-04 01:34:23 +020046#if MICROPY_ENABLE_SLICE
Paul Sokolovsky31ba60f2014-01-03 02:51:16 +020047 } else if (MP_OBJ_IS_TYPE(rhs_in, &slice_type)) {
Damien Georgec8d13842014-01-04 01:06:10 +000048 machine_int_t start, stop, step;
Paul Sokolovsky31ba60f2014-01-03 02:51:16 +020049 mp_obj_slice_get(rhs_in, &start, &stop, &step);
50 assert(step == 1);
Paul Sokolovskydecad082014-01-03 23:36:56 +020051 int len = strlen(lhs_str);
52 if (start < 0) {
53 start = len + start;
Paul Sokolovsky6ee1e382014-01-04 03:47:34 +020054 if (start < 0) {
55 start = 0;
56 }
57 } else if (start > len) {
58 start = len;
Paul Sokolovskydecad082014-01-03 23:36:56 +020059 }
60 if (stop <= 0) {
61 stop = len + stop;
Paul Sokolovsky6ee1e382014-01-04 03:47:34 +020062 // CPython returns empty string in such case
63 if (stop < 0) {
64 stop = start;
65 }
66 } else if (stop > len) {
67 stop = len;
Paul Sokolovskydecad082014-01-03 23:36:56 +020068 }
Paul Sokolovsky31ba60f2014-01-03 02:51:16 +020069 return mp_obj_new_str(qstr_from_strn_copy(lhs_str + start, stop - start));
Paul Sokolovskye606cb62014-01-04 01:34:23 +020070#endif
Paul Sokolovsky31ba60f2014-01-03 02:51:16 +020071 } else {
Paul Sokolovskyf8b9d3c2014-01-04 01:38:26 +020072 // Message doesn't match CPython, but we don't have so much bytes as they
73 // to spend them on verbose wording
Damien Georgeeb7bfcb2014-01-04 15:57:35 +000074 nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError, "index must be int"));
Paul Sokolovsky31ba60f2014-01-03 02:51:16 +020075 }
Damiend99b0522013-12-21 18:17:45 +000076
77 case RT_BINARY_OP_ADD:
78 case RT_BINARY_OP_INPLACE_ADD:
79 if (MP_OBJ_IS_TYPE(rhs_in, &str_type)) {
80 // add 2 strings
81 const char *rhs_str = qstr_str(((mp_obj_str_t*)rhs_in)->qstr);
Damien Georgefe8fb912014-01-02 16:36:09 +000082 size_t lhs_len = strlen(lhs_str);
83 size_t rhs_len = strlen(rhs_str);
84 int alloc_len = lhs_len + rhs_len + 1;
Damien732407f2013-12-29 19:33:23 +000085 char *val = m_new(char, alloc_len);
Damien Georgefe8fb912014-01-02 16:36:09 +000086 memcpy(val, lhs_str, lhs_len);
87 memcpy(val + lhs_len, rhs_str, rhs_len);
88 val[lhs_len + rhs_len] = '\0';
Damien732407f2013-12-29 19:33:23 +000089 return mp_obj_new_str(qstr_from_str_take(val, alloc_len));
Damiend99b0522013-12-21 18:17:45 +000090 }
91 break;
John R. Lentonc1bef212014-01-11 12:39:33 +000092 case RT_COMPARE_OP_IN:
93 case RT_COMPARE_OP_NOT_IN:
94 /* NOTE `a in b` is `b.__contains__(a)` */
95 if (MP_OBJ_IS_TYPE(rhs_in, &str_type)) {
96 const char *rhs_str = qstr_str(((mp_obj_str_t*)rhs_in)->qstr);
97 /* FIXME \0 in strs */
98 return MP_BOOL((op == RT_COMPARE_OP_IN) ^ (strstr(lhs_str, rhs_str) == NULL));
99 }
100 break;
Damiend99b0522013-12-21 18:17:45 +0000101 }
102
103 return MP_OBJ_NULL; // op not supported
104}
105
xyb8cfc9f02014-01-05 18:47:51 +0800106static mp_obj_t str_getiter(mp_obj_t o_in) {
107 return mp_obj_new_str_iterator(o_in, 0);
108}
109
Damiend99b0522013-12-21 18:17:45 +0000110mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
111 assert(MP_OBJ_IS_TYPE(self_in, &str_type));
112 mp_obj_str_t *self = self_in;
Damiend99b0522013-12-21 18:17:45 +0000113
Damien Georgefe8fb912014-01-02 16:36:09 +0000114 // get separation string
115 const char *sep_str = qstr_str(self->qstr);
116 size_t sep_len = strlen(sep_str);
117
118 // process args
Damiend99b0522013-12-21 18:17:45 +0000119 uint seq_len;
120 mp_obj_t *seq_items;
121 if (MP_OBJ_IS_TYPE(arg, &tuple_type)) {
122 mp_obj_tuple_get(arg, &seq_len, &seq_items);
123 } else if (MP_OBJ_IS_TYPE(arg, &list_type)) {
124 mp_obj_list_get(arg, &seq_len, &seq_items);
125 } else {
126 goto bad_arg;
127 }
Damien Georgefe8fb912014-01-02 16:36:09 +0000128
129 // count required length
130 int required_len = 0;
Damiend99b0522013-12-21 18:17:45 +0000131 for (int i = 0; i < seq_len; i++) {
132 if (!MP_OBJ_IS_TYPE(seq_items[i], &str_type)) {
133 goto bad_arg;
134 }
Damien Georgefe8fb912014-01-02 16:36:09 +0000135 if (i > 0) {
136 required_len += sep_len;
137 }
Damiend99b0522013-12-21 18:17:45 +0000138 required_len += strlen(qstr_str(mp_obj_str_get(seq_items[i])));
139 }
140
141 // make joined string
142 char *joined_str = m_new(char, required_len + 1);
Damien Georgefe8fb912014-01-02 16:36:09 +0000143 char *s_dest = joined_str;
Damiend99b0522013-12-21 18:17:45 +0000144 for (int i = 0; i < seq_len; i++) {
Damiend99b0522013-12-21 18:17:45 +0000145 if (i > 0) {
Damien Georgefe8fb912014-01-02 16:36:09 +0000146 memcpy(s_dest, sep_str, sep_len);
147 s_dest += sep_len;
Damiend99b0522013-12-21 18:17:45 +0000148 }
Damien Georgefe8fb912014-01-02 16:36:09 +0000149 const char *s2 = qstr_str(mp_obj_str_get(seq_items[i]));
150 size_t s2_len = strlen(s2);
151 memcpy(s_dest, s2, s2_len);
152 s_dest += s2_len;
Damiend99b0522013-12-21 18:17:45 +0000153 }
Damien Georgefe8fb912014-01-02 16:36:09 +0000154 *s_dest = '\0';
155
156 // return joined string
Damien732407f2013-12-29 19:33:23 +0000157 return mp_obj_new_str(qstr_from_str_take(joined_str, required_len + 1));
Damiend99b0522013-12-21 18:17:45 +0000158
159bad_arg:
Damien Georgeeb7bfcb2014-01-04 15:57:35 +0000160 nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError, "?str.join expecting a list of str's"));
Damiend99b0522013-12-21 18:17:45 +0000161}
162
xbe7b0f39f2014-01-08 14:23:45 -0800163static bool chr_in_str(const char* const str, const size_t str_len, const char c) {
164 for (size_t i = 0; i < str_len; i++) {
165 if (str[i] == c) {
166 return true;
167 }
168 }
169 return false;
170}
171
John R. Lentone8204912014-01-12 21:53:52 +0000172static mp_obj_t str_find(int n_args, const mp_obj_t *args) {
173 assert(2 <= n_args && n_args <= 4);
174 assert(MP_OBJ_IS_TYPE(args[0], &str_type));
175 if (!MP_OBJ_IS_TYPE(args[1], &str_type)) {
176 nlr_jump(mp_obj_new_exception_msg_1_arg(
177 MP_QSTR_TypeError,
178 "Can't convert '%s' object to str implicitly",
179 mp_obj_get_type_str(args[1])));
180 }
181
182 const char* haystack = qstr_str(((mp_obj_str_t*)args[0])->qstr);
183 const char* needle = qstr_str(((mp_obj_str_t*)args[1])->qstr);
184
Damien George23005372014-01-13 19:39:01 +0000185 size_t haystack_len = strlen(haystack);
186 size_t needle_len = strlen(needle);
John R. Lentone8204912014-01-12 21:53:52 +0000187
188 size_t start = 0;
189 size_t end = haystack_len;
190 /* TODO use a non-exception-throwing mp_get_index */
191 if (n_args >= 3 && args[2] != mp_const_none) {
192 start = mp_get_index(&str_type, haystack_len, args[2]);
193 }
194 if (n_args >= 4 && args[3] != mp_const_none) {
195 end = mp_get_index(&str_type, haystack_len, args[3]);
196 }
197
198 char *p = strstr(haystack + start, needle);
Damien George23005372014-01-13 19:39:01 +0000199 if (p == NULL) {
200 // not found
201 return MP_OBJ_NEW_SMALL_INT(-1);
202 } else {
203 // found
204 machine_int_t pos = p - haystack;
John R. Lentone8204912014-01-12 21:53:52 +0000205 if (pos + needle_len > end) {
206 pos = -1;
207 }
Damien George23005372014-01-13 19:39:01 +0000208 return MP_OBJ_NEW_SMALL_INT(pos);
John R. Lentone8204912014-01-12 21:53:52 +0000209 }
John R. Lentone8204912014-01-12 21:53:52 +0000210}
211
xbe7b0f39f2014-01-08 14:23:45 -0800212mp_obj_t str_strip(int n_args, const mp_obj_t *args) {
213 assert(1 <= n_args && n_args <= 2);
214 assert(MP_OBJ_IS_TYPE(args[0], &str_type));
215 const char *chars_to_del;
216 static const char whitespace[] = " \t\n\r\v\f";
217
218 if (n_args == 1) {
219 chars_to_del = whitespace;
220 } else {
221 assert(MP_OBJ_IS_TYPE(args[1], &str_type));
222 mp_obj_str_t *chars_to_del_obj = args[1];
223 chars_to_del = qstr_str(chars_to_del_obj->qstr);
224 }
225
226 const size_t chars_to_del_len = strlen(chars_to_del);
227 mp_obj_str_t *self = args[0];
228 const char *orig_str = qstr_str(self->qstr);
229 const size_t orig_str_len = strlen(orig_str);
230
231 size_t first_good_char_pos = 0;
232 bool first_good_char_pos_set = false;
233 size_t last_good_char_pos = 0;
234 for (size_t i = 0; i < orig_str_len; i++) {
235 if (!chr_in_str(chars_to_del, chars_to_del_len, orig_str[i])) {
236 last_good_char_pos = i;
237 if (!first_good_char_pos_set) {
238 first_good_char_pos = i;
239 first_good_char_pos_set = true;
240 }
241 }
242 }
243
244 if (first_good_char_pos == 0 && last_good_char_pos == 0) {
245 //string is all whitespace, return '\0'
246 char *empty = m_new(char, 1);
247 empty[0] = '\0';
248 return mp_obj_new_str(qstr_from_str_take(empty, 1));
249 }
250
251 assert(last_good_char_pos >= first_good_char_pos);
252 //+1 to accomodate the last character
253 size_t stripped_len = last_good_char_pos - first_good_char_pos + 1;
254 //+1 to accomodate '\0'
255 char *stripped_str = m_new(char, stripped_len + 1);
Damien George2d454292014-01-09 22:04:45 +0000256 memcpy(stripped_str, orig_str + first_good_char_pos, stripped_len);
xbe7b0f39f2014-01-08 14:23:45 -0800257 stripped_str[stripped_len] = '\0';
258 return mp_obj_new_str(qstr_from_str_take(stripped_str, stripped_len + 1));
259}
260
Damiend99b0522013-12-21 18:17:45 +0000261void vstr_printf_wrapper(void *env, const char *fmt, ...) {
262 va_list args;
263 va_start(args, fmt);
264 vstr_vprintf(env, fmt, args);
265 va_end(args);
266}
267
268mp_obj_t str_format(int n_args, const mp_obj_t *args) {
269 assert(MP_OBJ_IS_TYPE(args[0], &str_type));
270 mp_obj_str_t *self = args[0];
271
272 const char *str = qstr_str(self->qstr);
273 int arg_i = 1;
274 vstr_t *vstr = vstr_new();
275 for (; *str; str++) {
276 if (*str == '{') {
277 str++;
278 if (*str == '{') {
279 vstr_add_char(vstr, '{');
280 } else if (*str == '}') {
281 if (arg_i >= n_args) {
Damien Georgeeb7bfcb2014-01-04 15:57:35 +0000282 nlr_jump(mp_obj_new_exception_msg(MP_QSTR_IndexError, "tuple index out of range"));
Damiend99b0522013-12-21 18:17:45 +0000283 }
Paul Sokolovsky76d982e2014-01-13 19:19:16 +0200284 // TODO: may be PRINT_REPR depending on formatting code
285 mp_obj_print_helper(vstr_printf_wrapper, vstr, args[arg_i], PRINT_STR);
Damiend99b0522013-12-21 18:17:45 +0000286 arg_i++;
287 }
288 } else {
289 vstr_add_char(vstr, *str);
290 }
291 }
292
Damien732407f2013-12-29 19:33:23 +0000293 return mp_obj_new_str(qstr_from_str_take(vstr->buf, vstr->alloc));
Damiend99b0522013-12-21 18:17:45 +0000294}
295
John R. Lentone8204912014-01-12 21:53:52 +0000296static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_find_obj, 2, 4, str_find);
Damiend99b0522013-12-21 18:17:45 +0000297static MP_DEFINE_CONST_FUN_OBJ_2(str_join_obj, str_join);
xbe7b0f39f2014-01-08 14:23:45 -0800298static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_strip_obj, 1, 2, str_strip);
Damiend99b0522013-12-21 18:17:45 +0000299static MP_DEFINE_CONST_FUN_OBJ_VAR(str_format_obj, 1, str_format);
300
ian-va5a01df2014-01-06 14:14:11 -0800301static const mp_method_t str_type_methods[] = {
John R. Lentone8204912014-01-12 21:53:52 +0000302 { "find", &str_find_obj },
ian-v7a16fad2014-01-06 09:52:29 -0800303 { "join", &str_join_obj },
xbe7b0f39f2014-01-08 14:23:45 -0800304 { "strip", &str_strip_obj },
ian-v7a16fad2014-01-06 09:52:29 -0800305 { "format", &str_format_obj },
306 { NULL, NULL }, // end-of-list sentinel
307};
Damien George97209d32014-01-07 15:58:30 +0000308
Damiend99b0522013-12-21 18:17:45 +0000309const mp_obj_type_t str_type = {
310 { &mp_const_type },
311 "str",
Paul Sokolovsky860ffb02014-01-05 22:34:09 +0200312 .print = str_print,
313 .binary_op = str_binary_op,
314 .getiter = str_getiter,
ian-v7a16fad2014-01-06 09:52:29 -0800315 .methods = str_type_methods,
Damiend99b0522013-12-21 18:17:45 +0000316};
317
318mp_obj_t mp_obj_new_str(qstr qstr) {
319 mp_obj_str_t *o = m_new_obj(mp_obj_str_t);
320 o->base.type = &str_type;
321 o->qstr = qstr;
322 return o;
323}
324
325qstr mp_obj_str_get(mp_obj_t self_in) {
326 assert(MP_OBJ_IS_TYPE(self_in, &str_type));
327 mp_obj_str_t *self = self_in;
328 return self->qstr;
329}
xyb8cfc9f02014-01-05 18:47:51 +0800330
331/******************************************************************************/
332/* str iterator */
333
334typedef struct _mp_obj_str_it_t {
335 mp_obj_base_t base;
336 mp_obj_str_t *str;
337 machine_uint_t cur;
338} mp_obj_str_it_t;
339
340mp_obj_t str_it_iternext(mp_obj_t self_in) {
341 mp_obj_str_it_t *self = self_in;
342 const char *str = qstr_str(self->str->qstr);
343 if (self->cur < strlen(str)) {
344 mp_obj_t o_out = mp_obj_new_str(qstr_from_strn_copy(str + self->cur, 1));
345 self->cur += 1;
346 return o_out;
347 } else {
348 return mp_const_stop_iteration;
349 }
350}
351
352static const mp_obj_type_t str_it_type = {
353 { &mp_const_type },
354 "str_iterator",
Paul Sokolovsky860ffb02014-01-05 22:34:09 +0200355 .iternext = str_it_iternext,
xyb8cfc9f02014-01-05 18:47:51 +0800356};
357
358mp_obj_t mp_obj_new_str_iterator(mp_obj_str_t *str, int cur) {
359 mp_obj_str_it_t *o = m_new_obj(mp_obj_str_it_t);
360 o->base.type = &str_it_type;
361 o->str = str;
362 o->cur = cur;
363 return o;
364}