Blame - py/objstrunicode.c - lite/micropython

blob: b1b61d74fe56d6f6fa439179506cab00f345ab88 [file] [log] [blame]

Paul Sokolovsky	8386534	2014-06-13 00:51:34 +0300	[diff] [blame]	1	/*
				2	* This file is part of the Micro Python project, http://micropython.org/
				3	*
				4	* The MIT License (MIT)
				5	*
				6	* Copyright (c) 2013, 2014 Damien P. George
				7	* Copyright (c) 2014 Paul Sokolovsky
				8	*
				9	* Permission is hereby granted, free of charge, to any person obtaining a copy
				10	* of this software and associated documentation files (the "Software"), to deal
				11	* in the Software without restriction, including without limitation the rights
				12	* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
				13	* copies of the Software, and to permit persons to whom the Software is
				14	* furnished to do so, subject to the following conditions:
				15	*
				16	* The above copyright notice and this permission notice shall be included in
				17	* all copies or substantial portions of the Software.
				18	*
				19	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				20	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				21	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				22	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				23	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				24	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
				25	* THE SOFTWARE.
				26	*/
				27
				28	#include <stdbool.h>
				29	#include <string.h>
				30	#include <assert.h>
				31
				32	#include "mpconfig.h"
				33	#include "nlr.h"
				34	#include "misc.h"
				35	#include "qstr.h"
				36	#include "obj.h"
				37	#include "runtime0.h"
				38	#include "runtime.h"
				39	#include "pfenv.h"
				40	#include "objstr.h"
				41	#include "objlist.h"
				42
				43	STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, uint n_args, const mp_obj_t *args, mp_obj_t dict);
				44	const mp_obj_t mp_const_empty_bytes;
				45
				46	// use this macro to extract the string hash
				47	#define GET_STR_HASH(str_obj_in, str_hash) uint str_hash; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_hash = qstr_hash(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_hash = ((mp_obj_str_t*)str_obj_in)->hash; }
				48
				49	// use this macro to extract the string length
				50	#define GET_STR_LEN(str_obj_in, str_len) uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_len = qstr_len(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; }
				51
				52	// use this macro to extract the string data and length
				53	#define GET_STR_DATA_LEN(str_obj_in, str_data, str_len) const byte str_data; uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len); } else { str_len = ((mp_obj_str_t)str_obj_in)->len; str_data = ((mp_obj_str_t*)str_obj_in)->data; }
				54
				55	STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str);
				56	STATIC mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str);
				57	STATIC NORETURN void bad_implicit_conversion(mp_obj_t self_in);
				58	STATIC NORETURN void arg_type_mixup();
				59
				60	STATIC bool is_str_or_bytes(mp_obj_t o) {
				61	return MP_OBJ_IS_STR(o) \|\| MP_OBJ_IS_TYPE(o, &mp_type_bytes);
				62	}
				63
				64	/******************************************************************************/
				65	/* str */
				66
Chris Angelico	64b468d	2014-06-04 05:28:12 +1000	[diff] [blame^]	67	void mp_str_print_quoted(void (print)(void env, const char fmt, ...), void env, const byte *str_data, uint str_len, bool is_bytes) {
Paul Sokolovsky	8386534	2014-06-13 00:51:34 +0300	[diff] [blame]	68	// this escapes characters, but it will be very slow to print (calling print many times)
				69	bool has_single_quote = false;
				70	bool has_double_quote = false;
				71	for (const byte s = str_data, top = str_data + str_len; !has_double_quote && s < top; s++) {
				72	if (*s == '\'') {
				73	has_single_quote = true;
				74	} else if (*s == '"') {
				75	has_double_quote = true;
				76	}
				77	}
				78	int quote_char = '\'';
				79	if (has_single_quote && !has_double_quote) {
				80	quote_char = '"';
				81	}
				82	print(env, "%c", quote_char);
Chris Angelico	64b468d	2014-06-04 05:28:12 +1000	[diff] [blame^]	83	const char s = (const char )str_data, top = (const char )str_data + str_len;
				84	while (s < top) {
				85	unichar ch;
				86	if (is_bytes) {
				87	ch = (unsigned char )s++; // Don't sign-extend bytes
Paul Sokolovsky	8386534	2014-06-13 00:51:34 +0300	[diff] [blame]	88	} else {
Chris Angelico	64b468d	2014-06-04 05:28:12 +1000	[diff] [blame^]	89	ch = utf8_get_char(s);
				90	s = utf8_next_char(s);
				91	}
				92	if (ch == quote_char) {
				93	print(env, "\\%c", quote_char);
				94	} else if (ch == '\\') {
				95	print(env, "\\\\");
				96	} else if (32 <= ch && ch <= 126) {
				97	print(env, "%c", ch);
				98	} else if (ch == '\n') {
				99	print(env, "\\n");
				100	} else if (ch == '\r') {
				101	print(env, "\\r");
				102	} else if (ch == '\t') {
				103	print(env, "\\t");
				104	} else if (ch < 0x100) {
				105	print(env, "\\x%02x", ch);
				106	} else if (ch < 0x10000) {
				107	print(env, "\\u%04x", ch);
				108	} else {
				109	print(env, "\\U%08x", ch);
Paul Sokolovsky	8386534	2014-06-13 00:51:34 +0300	[diff] [blame]	110	}
				111	}
				112	print(env, "%c", quote_char);
				113	}
				114
				115	STATIC void str_print(void (print)(void env, const char fmt, ...), void env, mp_obj_t self_in, mp_print_kind_t kind) {
				116	GET_STR_DATA_LEN(self_in, str_data, str_len);
				117	bool is_bytes = MP_OBJ_IS_TYPE(self_in, &mp_type_bytes);
				118	if (kind == PRINT_STR && !is_bytes) {
				119	print(env, "%.*s", str_len, str_data);
				120	} else {
				121	if (is_bytes) {
				122	print(env, "b");
				123	}
Chris Angelico	64b468d	2014-06-04 05:28:12 +1000	[diff] [blame^]	124	mp_str_print_quoted(print, env, str_data, str_len, is_bytes);
Paul Sokolovsky	8386534	2014-06-13 00:51:34 +0300	[diff] [blame]	125	}
				126	}
				127
				128	STATIC mp_obj_t str_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
				129	#if MICROPY_CPYTHON_COMPAT
				130	if (n_kw != 0) {
				131	mp_arg_error_unimpl_kw();
				132	}
				133	#endif
				134
				135	switch (n_args) {
				136	case 0:
				137	return MP_OBJ_NEW_QSTR(MP_QSTR_);
				138
				139	case 1:
				140	{
				141	vstr_t *vstr = vstr_new();
				142	mp_obj_print_helper((void ()(void, const char*, ...))vstr_printf, vstr, args[0], PRINT_STR);
				143	mp_obj_t s = mp_obj_new_str(vstr->buf, vstr->len, false);
				144	vstr_free(vstr);
				145	return s;
				146	}
				147
				148	case 2:
				149	case 3:
				150	{
				151	// TODO: validate 2nd/3rd args
				152	if (!MP_OBJ_IS_TYPE(args[0], &mp_type_bytes)) {
				153	nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "bytes expected"));
				154	}
				155	GET_STR_DATA_LEN(args[0], str_data, str_len);
				156	GET_STR_HASH(args[0], str_hash);
				157	mp_obj_str_t *o = mp_obj_new_str_of_type(&mp_type_str, NULL, str_len);
				158	o->data = str_data;
				159	o->hash = str_hash;
				160	return o;
				161	}
				162
				163	default:
				164	nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "str takes at most 3 arguments"));
				165	}
				166	}
				167
				168	STATIC mp_obj_t bytes_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
				169	if (n_args == 0) {
				170	return mp_const_empty_bytes;
				171	}
				172
				173	#if MICROPY_CPYTHON_COMPAT
				174	if (n_kw != 0) {
				175	mp_arg_error_unimpl_kw();
				176	}
				177	#endif
				178
				179	if (MP_OBJ_IS_STR(args[0])) {
				180	if (n_args < 2 \|\| n_args > 3) {
				181	goto wrong_args;
				182	}
				183	GET_STR_DATA_LEN(args[0], str_data, str_len);
				184	GET_STR_HASH(args[0], str_hash);
				185	mp_obj_str_t *o = mp_obj_new_str_of_type(&mp_type_bytes, NULL, str_len);
				186	o->data = str_data;
				187	o->hash = str_hash;
				188	return o;
				189	}
				190
				191	if (n_args > 1) {
				192	goto wrong_args;
				193	}
				194
				195	if (MP_OBJ_IS_SMALL_INT(args[0])) {
				196	uint len = MP_OBJ_SMALL_INT_VALUE(args[0]);
				197	byte *data;
				198
				199	mp_obj_t o = mp_obj_str_builder_start(&mp_type_bytes, len, &data);
				200	memset(data, 0, len);
				201	return mp_obj_str_builder_end(o);
				202	}
				203
				204	int len;
				205	byte *data;
				206	vstr_t *vstr = NULL;
				207	mp_obj_t o = NULL;
				208	// Try to create array of exact len if initializer len is known
				209	mp_obj_t len_in = mp_obj_len_maybe(args[0]);
				210	if (len_in == MP_OBJ_NULL) {
				211	len = -1;
				212	vstr = vstr_new();
				213	} else {
				214	len = MP_OBJ_SMALL_INT_VALUE(len_in);
				215	o = mp_obj_str_builder_start(&mp_type_bytes, len, &data);
				216	}
				217
				218	mp_obj_t iterable = mp_getiter(args[0]);
				219	mp_obj_t item;
				220	while ((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {
				221	if (len == -1) {
				222	vstr_add_char(vstr, MP_OBJ_SMALL_INT_VALUE(item));
				223	} else {
				224	*data++ = MP_OBJ_SMALL_INT_VALUE(item);
				225	}
				226	}
				227
				228	if (len == -1) {
				229	vstr_shrink(vstr);
				230	// TODO: Optimize, borrow buffer from vstr
				231	len = vstr_len(vstr);
				232	o = mp_obj_str_builder_start(&mp_type_bytes, len, &data);
				233	memcpy(data, vstr_str(vstr), len);
				234	vstr_free(vstr);
				235	}
				236
				237	return mp_obj_str_builder_end(o);
				238
				239	wrong_args:
				240	nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "wrong number of arguments"));
				241	}
				242
				243	// like strstr but with specified length and allows \0 bytes
				244	// TODO replace with something more efficient/standard
				245	STATIC const byte find_subbytes(const byte haystack, machine_uint_t hlen, const byte *needle, machine_uint_t nlen, machine_int_t direction) {
				246	if (hlen >= nlen) {
				247	machine_uint_t str_index, str_index_end;
				248	if (direction > 0) {
				249	str_index = 0;
				250	str_index_end = hlen - nlen;
				251	} else {
				252	str_index = hlen - nlen;
				253	str_index_end = 0;
				254	}
				255	for (;;) {
				256	if (memcmp(&haystack[str_index], needle, nlen) == 0) {
				257	//found
				258	return haystack + str_index;
				259	}
				260	if (str_index == str_index_end) {
				261	//not found
				262	break;
				263	}
				264	str_index += direction;
				265	}
				266	}
				267	return NULL;
				268	}
				269
				270	STATIC mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
				271	GET_STR_DATA_LEN(lhs_in, lhs_data, lhs_len);
				272	mp_obj_type_t *lhs_type = mp_obj_get_type(lhs_in);
				273	mp_obj_type_t *rhs_type = mp_obj_get_type(rhs_in);
				274	switch (op) {
				275	case MP_BINARY_OP_ADD:
				276	case MP_BINARY_OP_INPLACE_ADD:
				277	if (lhs_type == rhs_type) {
				278	// add 2 strings or bytes
				279
				280	GET_STR_DATA_LEN(rhs_in, rhs_data, rhs_len);
				281	int alloc_len = lhs_len + rhs_len;
				282
				283	/* code for making qstr
				284	byte *q_ptr;
				285	byte *val = qstr_build_start(alloc_len, &q_ptr);
				286	memcpy(val, lhs_data, lhs_len);
				287	memcpy(val + lhs_len, rhs_data, rhs_len);
				288	return MP_OBJ_NEW_QSTR(qstr_build_end(q_ptr));
				289	*/
				290
				291	// code for non-qstr
				292	byte *data;
				293	mp_obj_t s = mp_obj_str_builder_start(lhs_type, alloc_len, &data);
				294	memcpy(data, lhs_data, lhs_len);
				295	memcpy(data + lhs_len, rhs_data, rhs_len);
				296	return mp_obj_str_builder_end(s);
				297	}
				298	break;
				299
				300	case MP_BINARY_OP_IN:
				301	/* NOTE `a in b` is `b.__contains__(a)` */
				302	if (lhs_type == rhs_type) {
				303	GET_STR_DATA_LEN(rhs_in, rhs_data, rhs_len);
				304	return MP_BOOL(find_subbytes(lhs_data, lhs_len, rhs_data, rhs_len, 1) != NULL);
				305	}
				306	break;
				307
				308	case MP_BINARY_OP_MULTIPLY: {
				309	if (!MP_OBJ_IS_SMALL_INT(rhs_in)) {
				310	return MP_OBJ_NULL; // op not supported
				311	}
				312	int n = MP_OBJ_SMALL_INT_VALUE(rhs_in);
				313	byte *data;
				314	mp_obj_t s = mp_obj_str_builder_start(lhs_type, lhs_len * n, &data);
				315	mp_seq_multiply(lhs_data, sizeof(*lhs_data), lhs_len, n, data);
				316	return mp_obj_str_builder_end(s);
				317	}
				318
				319	case MP_BINARY_OP_MODULO: {
				320	mp_obj_t *args;
				321	uint n_args;
				322	mp_obj_t dict = MP_OBJ_NULL;
				323	if (MP_OBJ_IS_TYPE(rhs_in, &mp_type_tuple)) {
				324	// TODO: Support tuple subclasses?
				325	mp_obj_tuple_get(rhs_in, &n_args, &args);
				326	} else if (MP_OBJ_IS_TYPE(rhs_in, &mp_type_dict)) {
				327	args = NULL;
				328	n_args = 0;
				329	dict = rhs_in;
				330	} else {
				331	args = &rhs_in;
				332	n_args = 1;
				333	}
				334	return str_modulo_format(lhs_in, n_args, args, dict);
				335	}
				336
				337	//case MP_BINARY_OP_NOT_EQUAL: // This is never passed here
				338	case MP_BINARY_OP_EQUAL: // This will be passed only for bytes, str is dealt with in mp_obj_equal()
				339	case MP_BINARY_OP_LESS:
				340	case MP_BINARY_OP_LESS_EQUAL:
				341	case MP_BINARY_OP_MORE:
				342	case MP_BINARY_OP_MORE_EQUAL:
				343	if (lhs_type == rhs_type) {
				344	GET_STR_DATA_LEN(rhs_in, rhs_data, rhs_len);
				345	return MP_BOOL(mp_seq_cmp_bytes(op, lhs_data, lhs_len, rhs_data, rhs_len));
				346	}
				347	if (lhs_type == &mp_type_bytes) {
				348	mp_buffer_info_t bufinfo;
				349	if (!mp_get_buffer(rhs_in, &bufinfo, MP_BUFFER_READ)) {
				350	goto uncomparable;
				351	}
				352	return MP_BOOL(mp_seq_cmp_bytes(op, lhs_data, lhs_len, bufinfo.buf, bufinfo.len));
				353	}
				354	uncomparable:
				355	if (op == MP_BINARY_OP_EQUAL) {
				356	return mp_const_false;
				357	}
				358	}
				359
				360	return MP_OBJ_NULL; // op not supported
				361	}
				362
Chris Angelico	64b468d	2014-06-04 05:28:12 +1000	[diff] [blame^]	363	// Convert an index into a pointer to its lead byte. Out of bounds indexing will raise IndexError or
				364	// be capped to the first/last character of the string, depending on is_slice.
				365	STATIC const char str_index_to_ptr(const char self_data, uint self_len, mp_obj_t index, bool is_slice) {
				366	machine_int_t i;
				367	// Copied from mp_get_index; I don't want bounds checking, just give me
				368	// the integer as-is. (I can't bounds-check without scanning the whole
				369	// string; an out-of-bounds index will be caught in the loops below.)
				370	if (MP_OBJ_IS_SMALL_INT(index)) {
				371	i = MP_OBJ_SMALL_INT_VALUE(index);
				372	} else if (!mp_obj_get_int_maybe(index, &i)) {
				373	nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError, "string indices must be integers, not %s", mp_obj_get_type_str(index)));
				374	}
				375	const char s, top = self_data + self_len;
				376	if (i < 0)
				377	{
				378	// Negative indexing is performed by counting from the end of the string.
				379	for (s = top - 1; i; --s) {
				380	if (s < self_data) {
				381	if (is_slice) {
				382	return self_data;
				383	}
				384	nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_IndexError, "string index out of range"));
				385	}
				386	if (!UTF8_IS_CONT(*s)) {
				387	++i;
				388	}
				389	}
				390	++s;
				391	} else if (!i) {
				392	return self_data; // Shortcut - str[0] is its base pointer
				393	} else {
				394	// Positive indexing, correspondingly, counts from the start of the string.
				395	// It's assumed that negative indexing will generally be used with small
				396	// absolute values (eg str[-1], not str[-1000000]), which means it'll be
				397	// more efficient this way.
				398	for (s = self_data; true; ++s) {
				399	if (s >= top) {
				400	if (is_slice) {
				401	return top;
				402	}
				403	nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_IndexError, "string index out of range"));
				404	}
				405	while (UTF8_IS_CONT(*s)) {
				406	++s;
				407	}
				408	if (!i--) {
				409	return s;
				410	}
				411	}
				412	}
				413	return s;
				414	}
				415
Paul Sokolovsky	8386534	2014-06-13 00:51:34 +0300	[diff] [blame]	416	STATIC mp_obj_t str_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) {
				417	mp_obj_type_t *type = mp_obj_get_type(self_in);
				418	GET_STR_DATA_LEN(self_in, self_data, self_len);
				419	if (value == MP_OBJ_SENTINEL) {
				420	// load
				421	#if MICROPY_PY_BUILTINS_SLICE
				422	if (MP_OBJ_IS_TYPE(index, &mp_type_slice)) {
Chris Angelico	64b468d	2014-06-04 05:28:12 +1000	[diff] [blame^]	423	mp_obj_t ostart, ostop, ostep;
				424	mp_obj_slice_get(index, &ostart, &ostop, &ostep);
				425	if (ostep != mp_const_none && ostep != MP_OBJ_NEW_SMALL_INT(1)) {
Paul Sokolovsky	8386534	2014-06-13 00:51:34 +0300	[diff] [blame]	426	nlr_raise(mp_obj_new_exception_msg(&mp_type_NotImplementedError,
				427	"only slices with step=1 (aka None) are supported"));
				428	}
Chris Angelico	64b468d	2014-06-04 05:28:12 +1000	[diff] [blame^]	429
				430	if (type == &mp_type_bytes) {
				431	machine_int_t start = 0, stop = self_len;
				432	if (ostart != mp_const_none) {
				433	start = MP_OBJ_SMALL_INT_VALUE(ostart);
				434	if (start < 0) {
				435	start = self_len + start;
				436	}
				437	}
				438	if (ostop != mp_const_none) {
				439	stop = MP_OBJ_SMALL_INT_VALUE(ostop);
				440	if (stop < 0) {
				441	stop = self_len + stop;
				442	}
				443	}
				444	return mp_obj_new_str_of_type(type, self_data + start, stop - start);
				445	}
				446	const char pstart, pstop;
				447	if (ostart != mp_const_none) {
				448	pstart = str_index_to_ptr((const char *)self_data, self_len, ostart, true);
				449	} else {
				450	pstart = (const char *)self_data;
				451	}
				452	if (ostop != mp_const_none) {
				453	// pstop will point just after the stop character. This depends on
				454	// the \0 at the end of the string.
				455	pstop = str_index_to_ptr((const char *)self_data, self_len, ostop, true);
				456	} else {
				457	pstop = (const char *)self_data + self_len;
				458	}
				459	if (pstop < pstart) {
				460	return MP_OBJ_NEW_QSTR(MP_QSTR_);
				461	}
				462	return mp_obj_new_str_of_type(type, (const byte *)pstart, pstop - pstart);
Paul Sokolovsky	8386534	2014-06-13 00:51:34 +0300	[diff] [blame]	463	}
				464	#endif
Paul Sokolovsky	8386534	2014-06-13 00:51:34 +0300	[diff] [blame]	465	if (type == &mp_type_bytes) {
Chris Angelico	64b468d	2014-06-04 05:28:12 +1000	[diff] [blame^]	466	uint index_val = mp_get_index(type, self_len, index, false);
Paul Sokolovsky	8386534	2014-06-13 00:51:34 +0300	[diff] [blame]	467	return MP_OBJ_NEW_SMALL_INT((mp_small_int_t)self_data[index_val]);
Paul Sokolovsky	8386534	2014-06-13 00:51:34 +0300	[diff] [blame]	468	}
Chris Angelico	64b468d	2014-06-04 05:28:12 +1000	[diff] [blame^]	469	const char s = str_index_to_ptr((const char )self_data, self_len, index, false);
				470	int len = 1;
				471	if (UTF8_IS_NONASCII(*s)) {
				472	// Count the number of 1 bits (after the first)
				473	for (char mask = 0x40; *s & mask; mask >>= 1) {
				474	++len;
				475	}
				476	}
				477	return mp_obj_new_str(s, len, true); // This will create a one-character string
Paul Sokolovsky	8386534	2014-06-13 00:51:34 +0300	[diff] [blame]	478	} else {
				479	return MP_OBJ_NULL; // op not supported
				480	}
				481	}
				482
				483	STATIC mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
				484	assert(is_str_or_bytes(self_in));
				485	const mp_obj_type_t *self_type = mp_obj_get_type(self_in);
				486
				487	// get separation string
				488	GET_STR_DATA_LEN(self_in, sep_str, sep_len);
				489
				490	// process args
				491	uint seq_len;
				492	mp_obj_t *seq_items;
				493	if (MP_OBJ_IS_TYPE(arg, &mp_type_tuple)) {
				494	mp_obj_tuple_get(arg, &seq_len, &seq_items);
				495	} else {
				496	if (!MP_OBJ_IS_TYPE(arg, &mp_type_list)) {
				497	// arg is not a list, try to convert it to one
				498	// TODO: Try to optimize?
				499	arg = mp_type_list.make_new((mp_obj_t)&mp_type_list, 1, 0, &arg);
				500	}
				501	mp_obj_list_get(arg, &seq_len, &seq_items);
				502	}
				503
				504	// count required length
				505	int required_len = 0;
				506	for (int i = 0; i < seq_len; i++) {
				507	if (mp_obj_get_type(seq_items[i]) != self_type) {
				508	nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError,
				509	"join expects a list of str/bytes objects consistent with self object"));
				510	}
				511	if (i > 0) {
				512	required_len += sep_len;
				513	}
				514	GET_STR_LEN(seq_items[i], l);
				515	required_len += l;
				516	}
				517
				518	// make joined string
				519	byte *data;
				520	mp_obj_t joined_str = mp_obj_str_builder_start(self_type, required_len, &data);
				521	for (int i = 0; i < seq_len; i++) {
				522	if (i > 0) {
				523	memcpy(data, sep_str, sep_len);
				524	data += sep_len;
				525	}
				526	GET_STR_DATA_LEN(seq_items[i], s, l);
				527	memcpy(data, s, l);
				528	data += l;
				529	}
				530
				531	// return joined string
				532	return mp_obj_str_builder_end(joined_str);
				533	}
				534
				535	#define is_ws(c) ((c) == ' ' \|\| (c) == '\t')
				536
				537	STATIC mp_obj_t str_split(uint n_args, const mp_obj_t *args) {
				538	const mp_obj_type_t *self_type = mp_obj_get_type(args[0]);
				539	machine_int_t splits = -1;
				540	mp_obj_t sep = mp_const_none;
				541	if (n_args > 1) {
				542	sep = args[1];
				543	if (n_args > 2) {
				544	splits = mp_obj_get_int(args[2]);
				545	}
				546	}
				547
				548	mp_obj_t res = mp_obj_new_list(0, NULL);
				549	GET_STR_DATA_LEN(args[0], s, len);
				550	const byte *top = s + len;
				551
				552	if (sep == mp_const_none) {
				553	// sep not given, so separate on whitespace
				554
				555	// Initial whitespace is not counted as split, so we pre-do it
				556	while (s < top && is_ws(*s)) s++;
				557	while (s < top && splits != 0) {
				558	const byte *start = s;
				559	while (s < top && !is_ws(*s)) s++;
				560	mp_obj_list_append(res, mp_obj_new_str_of_type(self_type, start, s - start));
				561	if (s >= top) {
				562	break;
				563	}
				564	while (s < top && is_ws(*s)) s++;
				565	if (splits > 0) {
				566	splits--;
				567	}
				568	}
				569
				570	if (s < top) {
				571	mp_obj_list_append(res, mp_obj_new_str_of_type(self_type, s, top - s));
				572	}
				573
				574	} else {
				575	// sep given
				576
				577	uint sep_len;
				578	const char *sep_str = mp_obj_str_get_data(sep, &sep_len);
				579
				580	if (sep_len == 0) {
				581	nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "empty separator"));
				582	}
				583
				584	for (;;) {
				585	const byte *start = s;
				586	for (;;) {
				587	if (splits == 0 \|\| s + sep_len > top) {
				588	s = top;
				589	break;
				590	} else if (memcmp(s, sep_str, sep_len) == 0) {
				591	break;
				592	}
				593	s++;
				594	}
				595	mp_obj_list_append(res, mp_obj_new_str_of_type(self_type, start, s - start));
				596	if (s >= top) {
				597	break;
				598	}
				599	s += sep_len;
				600	if (splits > 0) {
				601	splits--;
				602	}
				603	}
				604	}
				605
				606	return res;
				607	}
				608
				609	STATIC mp_obj_t str_rsplit(uint n_args, const mp_obj_t *args) {
				610	if (n_args < 3) {
				611	// If we don't have split limit, it doesn't matter from which side
				612	// we split.
				613	return str_split(n_args, args);
				614	}
				615	const mp_obj_type_t *self_type = mp_obj_get_type(args[0]);
				616	mp_obj_t sep = args[1];
				617	GET_STR_DATA_LEN(args[0], s, len);
				618
				619	machine_int_t splits = mp_obj_get_int(args[2]);
				620	machine_int_t org_splits = splits;
				621	// Preallocate list to the max expected # of elements, as we
				622	// will fill it from the end.
				623	mp_obj_list_t *res = mp_obj_new_list(splits + 1, NULL);
				624	int idx = splits;
				625
				626	if (sep == mp_const_none) {
				627	assert(!"TODO: rsplit(None,n) not implemented");
				628	} else {
				629	uint sep_len;
				630	const char *sep_str = mp_obj_str_get_data(sep, &sep_len);
				631
				632	if (sep_len == 0) {
				633	nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "empty separator"));
				634	}
				635
				636	const byte *beg = s;
				637	const byte *last = s + len;
				638	for (;;) {
				639	s = last - sep_len;
				640	for (;;) {
				641	if (splits == 0 \|\| s < beg) {
				642	break;
				643	} else if (memcmp(s, sep_str, sep_len) == 0) {
				644	break;
				645	}
				646	s--;
				647	}
				648	if (s < beg \|\| splits == 0) {
				649	res->items[idx] = mp_obj_new_str_of_type(self_type, beg, last - beg);
				650	break;
				651	}
				652	res->items[idx--] = mp_obj_new_str_of_type(self_type, s + sep_len, last - s - sep_len);
				653	last = s;
				654	if (splits > 0) {
				655	splits--;
				656	}
				657	}
				658	if (idx != 0) {
				659	// We split less parts than split limit, now go cleanup surplus
				660	int used = org_splits + 1 - idx;
				661	memcpy(res->items, &res->items[idx], used * sizeof(mp_obj_t));
				662	mp_seq_clear(res->items, used, res->alloc, sizeof(*res->items));
				663	res->len = used;
				664	}
				665	}
				666
				667	return res;
				668	}
				669
				670
				671	STATIC mp_obj_t str_finder(uint n_args, const mp_obj_t *args, machine_int_t direction, bool is_index) {
				672	assert(2 <= n_args && n_args <= 4);
				673	assert(MP_OBJ_IS_STR(args[0]));
				674	assert(MP_OBJ_IS_STR(args[1]));
				675
				676	GET_STR_DATA_LEN(args[0], haystack, haystack_len);
				677	GET_STR_DATA_LEN(args[1], needle, needle_len);
				678
				679	machine_uint_t start = 0;
				680	machine_uint_t end = haystack_len;
				681	if (n_args >= 3 && args[2] != mp_const_none) {
				682	start = mp_get_index(&mp_type_str, haystack_len, args[2], true);
				683	}
				684	if (n_args >= 4 && args[3] != mp_const_none) {
				685	end = mp_get_index(&mp_type_str, haystack_len, args[3], true);
				686	}
				687
				688	const byte *p = find_subbytes(haystack + start, end - start, needle, needle_len, direction);
				689	if (p == NULL) {
				690	// not found
				691	if (is_index) {
				692	nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "substring not found"));
				693	} else {
				694	return MP_OBJ_NEW_SMALL_INT(-1);
				695	}
				696	} else {
				697	// found
				698	return MP_OBJ_NEW_SMALL_INT(p - haystack);
				699	}
				700	}
				701
				702	STATIC mp_obj_t str_find(uint n_args, const mp_obj_t *args) {
				703	return str_finder(n_args, args, 1, false);
				704	}
				705
				706	STATIC mp_obj_t str_rfind(uint n_args, const mp_obj_t *args) {
				707	return str_finder(n_args, args, -1, false);
				708	}
				709
				710	STATIC mp_obj_t str_index(uint n_args, const mp_obj_t *args) {
				711	return str_finder(n_args, args, 1, true);
				712	}
				713
				714	STATIC mp_obj_t str_rindex(uint n_args, const mp_obj_t *args) {
				715	return str_finder(n_args, args, -1, true);
				716	}
				717
				718	// TODO: (Much) more variety in args
				719	STATIC mp_obj_t str_startswith(uint n_args, const mp_obj_t *args) {
				720	GET_STR_DATA_LEN(args[0], str, str_len);
				721	GET_STR_DATA_LEN(args[1], prefix, prefix_len);
				722	uint index_val = 0;
				723	if (n_args > 2) {
				724	index_val = mp_get_index(&mp_type_str, str_len, args[2], true);
				725	}
				726	if (prefix_len + index_val > str_len) {
				727	return mp_const_false;
				728	}
				729	return MP_BOOL(memcmp(str + index_val, prefix, prefix_len) == 0);
				730	}
				731
				732	STATIC mp_obj_t str_endswith(uint n_args, const mp_obj_t *args) {
				733	GET_STR_DATA_LEN(args[0], str, str_len);
				734	GET_STR_DATA_LEN(args[1], suffix, suffix_len);
				735	assert(n_args == 2);
				736
				737	if (suffix_len > str_len) {
				738	return mp_const_false;
				739	}
				740	return MP_BOOL(memcmp(str + (str_len - suffix_len), suffix, suffix_len) == 0);
				741	}
				742
				743	enum { LSTRIP, RSTRIP, STRIP };
				744
				745	STATIC mp_obj_t str_uni_strip(int type, uint n_args, const mp_obj_t *args) {
				746	assert(1 <= n_args && n_args <= 2);
				747	assert(is_str_or_bytes(args[0]));
				748	const mp_obj_type_t *self_type = mp_obj_get_type(args[0]);
				749
				750	const byte *chars_to_del;
				751	uint chars_to_del_len;
				752	static const byte whitespace[] = " \t\n\r\v\f";
				753
				754	if (n_args == 1) {
				755	chars_to_del = whitespace;
				756	chars_to_del_len = sizeof(whitespace);
				757	} else {
				758	if (mp_obj_get_type(args[1]) != self_type) {
				759	arg_type_mixup();
				760	}
				761	GET_STR_DATA_LEN(args[1], s, l);
				762	chars_to_del = s;
				763	chars_to_del_len = l;
				764	}
				765
				766	GET_STR_DATA_LEN(args[0], orig_str, orig_str_len);
				767
				768	machine_uint_t first_good_char_pos = 0;
				769	bool first_good_char_pos_set = false;
				770	machine_uint_t last_good_char_pos = 0;
				771	machine_uint_t i = 0;
				772	machine_int_t delta = 1;
				773	if (type == RSTRIP) {
				774	i = orig_str_len - 1;
				775	delta = -1;
				776	}
				777	for (machine_uint_t len = orig_str_len; len > 0; len--) {
				778	if (find_subbytes(chars_to_del, chars_to_del_len, &orig_str[i], 1, 1) == NULL) {
				779	if (!first_good_char_pos_set) {
				780	first_good_char_pos_set = true;
				781	first_good_char_pos = i;
				782	if (type == LSTRIP) {
				783	last_good_char_pos = orig_str_len - 1;
				784	break;
				785	} else if (type == RSTRIP) {
				786	first_good_char_pos = 0;
				787	last_good_char_pos = i;
				788	break;
				789	}
				790	}
				791	last_good_char_pos = i;
				792	}
				793	i += delta;
				794	}
				795
				796	if (!first_good_char_pos_set) {
				797	// string is all whitespace, return ''
				798	return MP_OBJ_NEW_QSTR(MP_QSTR_);
				799	}
				800
				801	assert(last_good_char_pos >= first_good_char_pos);
				802	//+1 to accomodate the last character
				803	machine_uint_t stripped_len = last_good_char_pos - first_good_char_pos + 1;
				804	if (stripped_len == orig_str_len) {
				805	// If nothing was stripped, don't bother to dup original string
				806	// TODO: watch out for this case when we'll get to bytearray.strip()
				807	assert(first_good_char_pos == 0);
				808	return args[0];
				809	}
				810	return mp_obj_new_str_of_type(self_type, orig_str + first_good_char_pos, stripped_len);
				811	}
				812
				813	STATIC mp_obj_t str_strip(uint n_args, const mp_obj_t *args) {
				814	return str_uni_strip(STRIP, n_args, args);
				815	}
				816
				817	STATIC mp_obj_t str_lstrip(uint n_args, const mp_obj_t *args) {
				818	return str_uni_strip(LSTRIP, n_args, args);
				819	}
				820
				821	STATIC mp_obj_t str_rstrip(uint n_args, const mp_obj_t *args) {
				822	return str_uni_strip(RSTRIP, n_args, args);
				823	}
				824
				825	// Takes an int arg, but only parses unsigned numbers, and only changes
				826	// *num if at least one digit was parsed.
				827	static int str_to_int(const char str, int num) {
				828	const char *s = str;
				829	if (unichar_isdigit(*s)) {
				830	*num = 0;
				831	do {
				832	num = num * 10 + (*s - '0');
				833	s++;
				834	}
				835	while (unichar_isdigit(*s));
				836	}
				837	return s - str;
				838	}
				839
				840	static bool isalignment(char ch) {
				841	return ch && strchr("<>=^", ch) != NULL;
				842	}
				843
				844	static bool istype(char ch) {
				845	return ch && strchr("bcdeEfFgGnosxX%", ch) != NULL;
				846	}
				847
				848	static bool arg_looks_integer(mp_obj_t arg) {
				849	return MP_OBJ_IS_TYPE(arg, &mp_type_bool) \|\| MP_OBJ_IS_INT(arg);
				850	}
				851
				852	static bool arg_looks_numeric(mp_obj_t arg) {
				853	return arg_looks_integer(arg)
				854	#if MICROPY_PY_BUILTINS_FLOAT
				855	\|\| MP_OBJ_IS_TYPE(arg, &mp_type_float)
				856	#endif
				857	;
				858	}
				859
				860	static mp_obj_t arg_as_int(mp_obj_t arg) {
				861	#if MICROPY_PY_BUILTINS_FLOAT
				862	if (MP_OBJ_IS_TYPE(arg, &mp_type_float)) {
				863
				864	// TODO: Needs a way to construct an mpz integer from a float
				865
				866	mp_small_int_t num = mp_obj_get_float(arg);
				867	return MP_OBJ_NEW_SMALL_INT(num);
				868	}
				869	#endif
				870	return arg;
				871	}
				872
				873	mp_obj_t mp_obj_str_format(uint n_args, const mp_obj_t *args) {
				874	assert(MP_OBJ_IS_STR(args[0]));
				875
				876	GET_STR_DATA_LEN(args[0], str, len);
				877	int arg_i = 0;
				878	vstr_t *vstr = vstr_new();
				879	pfenv_t pfenv_vstr;
				880	pfenv_vstr.data = vstr;
				881	pfenv_vstr.print_strn = pfenv_vstr_add_strn;
				882
				883	for (const byte *top = str + len; str < top; str++) {
				884	if (*str == '}') {
				885	str++;
				886	if (str < top && *str == '}') {
				887	vstr_add_char(vstr, '}');
				888	continue;
				889	}
				890	nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "single '}' encountered in format string"));
				891	}
				892	if (*str != '{') {
				893	vstr_add_char(vstr, *str);
				894	continue;
				895	}
				896
				897	str++;
				898	if (str < top && *str == '{') {
				899	vstr_add_char(vstr, '{');
				900	continue;
				901	}
				902
				903	// replacement_field ::= "{" [field_name] ["!" conversion] [":" format_spec] "}"
				904
				905	vstr_t *field_name = NULL;
				906	char conversion = '\0';
				907	vstr_t *format_spec = NULL;
				908
				909	if (str < top && str != '}' && str != '!' && *str != ':') {
				910	field_name = vstr_new();
				911	while (str < top && str != '}' && str != '!' && *str != ':') {
				912	vstr_add_char(field_name, *str++);
				913	}
				914	vstr_add_char(field_name, '\0');
				915	}
				916
				917	// conversion ::= "r" \| "s"
				918
				919	if (str < top && *str == '!') {
				920	str++;
				921	if (str < top && (str == 'r' \|\| str == 's')) {
				922	conversion = *str++;
				923	} else {
				924	nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "end of format while looking for conversion specifier"));
				925	}
				926	}
				927
				928	if (str < top && *str == ':') {
				929	str++;
				930	// {:} is the same as {}, which is the same as {!s}
				931	// This makes a difference when passing in a True or False
				932	// '{}'.format(True) returns 'True'
				933	// '{:d}'.format(True) returns '1'
				934	// So we treat {:} as {} and this later gets treated to be {!s}
				935	if (*str != '}') {
				936	format_spec = vstr_new();
				937	while (str < top && *str != '}') {
				938	vstr_add_char(format_spec, *str++);
				939	}
				940	vstr_add_char(format_spec, '\0');
				941	}
				942	}
				943	if (str >= top) {
				944	nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "unmatched '{' in format"));
				945	}
				946	if (*str != '}') {
				947	nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "expected ':' after format specifier"));
				948	}
				949
				950	mp_obj_t arg = mp_const_none;
				951
				952	if (field_name) {
				953	if (arg_i > 0) {
				954	nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "can't switch from automatic field numbering to manual field specification"));
				955	}
				956	int index = 0;
				957	if (str_to_int(vstr_str(field_name), &index) != vstr_len(field_name) - 1) {
				958	nlr_raise(mp_obj_new_exception_msg(&mp_type_KeyError, "attributes not supported yet"));
				959	}
				960	if (index >= n_args - 1) {
				961	nlr_raise(mp_obj_new_exception_msg(&mp_type_IndexError, "tuple index out of range"));
				962	}
				963	arg = args[index + 1];
				964	arg_i = -1;
				965	vstr_free(field_name);
				966	field_name = NULL;
				967	} else {
				968	if (arg_i < 0) {
				969	nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "can't switch from manual field specification to automatic field numbering"));
				970	}
				971	if (arg_i >= n_args - 1) {
				972	nlr_raise(mp_obj_new_exception_msg(&mp_type_IndexError, "tuple index out of range"));
				973	}
				974	arg = args[arg_i + 1];
				975	arg_i++;
				976	}
				977	if (!format_spec && !conversion) {
				978	conversion = 's';
				979	}
				980	if (conversion) {
				981	mp_print_kind_t print_kind;
				982	if (conversion == 's') {
				983	print_kind = PRINT_STR;
				984	} else if (conversion == 'r') {
				985	print_kind = PRINT_REPR;
				986	} else {
				987	nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError, "unknown conversion specifier %c", conversion));
				988	}
				989	vstr_t *arg_vstr = vstr_new();
				990	mp_obj_print_helper((void ()(void, const char*, ...))vstr_printf, arg_vstr, arg, print_kind);
				991	arg = mp_obj_new_str(vstr_str(arg_vstr), vstr_len(arg_vstr), false);
				992	vstr_free(arg_vstr);
				993	}
				994
				995	char sign = '\0';
				996	char fill = '\0';
				997	char align = '\0';
				998	int width = -1;
				999	int precision = -1;
				1000	char type = '\0';
				1001	int flags = 0;
				1002
				1003	if (format_spec) {
				1004	// The format specifier (from http://docs.python.org/2/library/string.html#formatspec)
				1005	//
				1006	// [[fill]align][sign][#][0][width][,][.precision][type]
				1007	// fill ::= <any character>
				1008	// align ::= "<" \| ">" \| "=" \| "^"
				1009	// sign ::= "+" \| "-" \| " "
				1010	// width ::= integer
				1011	// precision ::= integer
				1012	// type ::= "b" \| "c" \| "d" \| "e" \| "E" \| "f" \| "F" \| "g" \| "G" \| "n" \| "o" \| "s" \| "x" \| "X" \| "%"
				1013
				1014	const char *s = vstr_str(format_spec);
				1015	if (isalignment(*s)) {
				1016	align = *s++;
				1017	} else if (*s && isalignment(s[1])) {
				1018	fill = *s++;
				1019	align = *s++;
				1020	}
				1021	if (s == '+' \|\| s == '-' \|\| *s == ' ') {
				1022	if (*s == '+') {
				1023	flags \|= PF_FLAG_SHOW_SIGN;
				1024	} else if (*s == ' ') {
				1025	flags \|= PF_FLAG_SPACE_SIGN;
				1026	}
				1027	sign = *s++;
				1028	}
				1029	if (*s == '#') {
				1030	flags \|= PF_FLAG_SHOW_PREFIX;
				1031	s++;
				1032	}
				1033	if (*s == '0') {
				1034	if (!align) {
				1035	align = '=';
				1036	}
				1037	if (!fill) {
				1038	fill = '0';
				1039	}
				1040	}
				1041	s += str_to_int(s, &width);
				1042	if (*s == ',') {
				1043	flags \|= PF_FLAG_SHOW_COMMA;
				1044	s++;
				1045	}
				1046	if (*s == '.') {
				1047	s++;
				1048	s += str_to_int(s, &precision);
				1049	}
				1050	if (istype(*s)) {
				1051	type = *s++;
				1052	}
				1053	if (*s) {
				1054	nlr_raise(mp_obj_new_exception_msg(&mp_type_KeyError, "Invalid conversion specification"));
				1055	}
				1056	vstr_free(format_spec);
				1057	format_spec = NULL;
				1058	}
				1059	if (!align) {
				1060	if (arg_looks_numeric(arg)) {
				1061	align = '>';
				1062	} else {
				1063	align = '<';
				1064	}
				1065	}
				1066	if (!fill) {
				1067	fill = ' ';
				1068	}
				1069
				1070	if (sign) {
				1071	if (type == 's') {
				1072	nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "Sign not allowed in string format specifier"));
				1073	}
				1074	if (type == 'c') {
				1075	nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "Sign not allowed with integer format specifier 'c'"));
				1076	}
				1077	} else {
				1078	sign = '-';
				1079	}
				1080
				1081	switch (align) {
				1082	case '<': flags \|= PF_FLAG_LEFT_ADJUST; break;
				1083	case '=': flags \|= PF_FLAG_PAD_AFTER_SIGN; break;
				1084	case '^': flags \|= PF_FLAG_CENTER_ADJUST; break;
				1085	}
				1086
				1087	if (arg_looks_integer(arg)) {
				1088	switch (type) {
				1089	case 'b':
				1090	pfenv_print_mp_int(&pfenv_vstr, arg, 1, 2, 'a', flags, fill, width, 0);
				1091	continue;
				1092
				1093	case 'c':
				1094	{
				1095	char ch = mp_obj_get_int(arg);
				1096	pfenv_print_strn(&pfenv_vstr, &ch, 1, flags, fill, width);
				1097	continue;
				1098	}
				1099
				1100	case '\0': // No explicit format type implies 'd'
				1101	case 'n': // I don't think we support locales in uPy so use 'd'
				1102	case 'd':
				1103	pfenv_print_mp_int(&pfenv_vstr, arg, 1, 10, 'a', flags, fill, width, 0);
				1104	continue;
				1105
				1106	case 'o':
				1107	if (flags & PF_FLAG_SHOW_PREFIX) {
				1108	flags \|= PF_FLAG_SHOW_OCTAL_LETTER;
				1109	}
				1110
				1111	pfenv_print_mp_int(&pfenv_vstr, arg, 1, 8, 'a', flags, fill, width, 0);
				1112	continue;
				1113
				1114	case 'X':
				1115	case 'x':
				1116	pfenv_print_mp_int(&pfenv_vstr, arg, 1, 16, type - ('X' - 'A'), flags, fill, width, 0);
				1117	continue;
				1118
				1119	case 'e':
				1120	case 'E':
				1121	case 'f':
				1122	case 'F':
				1123	case 'g':
				1124	case 'G':
				1125	case '%':
				1126	// The floating point formatters all work with anything that
				1127	// looks like an integer
				1128	break;
				1129
				1130	default:
				1131	nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError,
				1132	"unknown format code '%c' for object of type '%s'", type, mp_obj_get_type_str(arg)));
				1133	}
				1134	}
				1135
				1136	// NOTE: no else here. We need the e, f, g etc formats for integer
				1137	// arguments (from above if) to take this if.
				1138	if (arg_looks_numeric(arg)) {
				1139	if (!type) {
				1140
				1141	// Even though the docs say that an unspecified type is the same
				1142	// as 'g', there is one subtle difference, when the exponent
				1143	// is one less than the precision.
				1144	//
				1145	// '{:10.1}'.format(0.0) ==> '0e+00'
				1146	// '{:10.1g}'.format(0.0) ==> '0'
				1147	//
				1148	// TODO: Figure out how to deal with this.
				1149	//
				1150	// A proper solution would involve adding a special flag
				1151	// or something to format_float, and create a format_double
				1152	// to deal with doubles. In order to fix this when using
				1153	// sprintf, we'd need to use the e format and tweak the
				1154	// returned result to strip trailing zeros like the g format
				1155	// does.
				1156	//
				1157	// {:10.3} and {:10.2e} with 1.23e2 both produce 1.23e+02
				1158	// but with 1.e2 you get 1e+02 and 1.00e+02
				1159	//
				1160	// Stripping the trailing 0's (like g) does would make the
				1161	// e format give us the right format.
				1162	//
				1163	// CPython sources say:
				1164	// Omitted type specifier. Behaves in the same way as repr(x)
				1165	// and str(x) if no precision is given, else like 'g', but with
				1166	// at least one digit after the decimal point. */
				1167
				1168	type = 'g';
				1169	}
				1170	if (type == 'n') {
				1171	type = 'g';
				1172	}
				1173
				1174	flags \|= PF_FLAG_PAD_NAN_INF; // '{:06e}'.format(float('-inf')) should give '-00inf'
				1175	switch (type) {
				1176	#if MICROPY_PY_BUILTINS_FLOAT
				1177	case 'e':
				1178	case 'E':
				1179	case 'f':
				1180	case 'F':
				1181	case 'g':
				1182	case 'G':
				1183	pfenv_print_float(&pfenv_vstr, mp_obj_get_float(arg), type, flags, fill, width, precision);
				1184	break;
				1185
				1186	case '%':
				1187	flags \|= PF_FLAG_ADD_PERCENT;
				1188	pfenv_print_float(&pfenv_vstr, mp_obj_get_float(arg) * 100.0F, 'f', flags, fill, width, precision);
				1189	break;
				1190	#endif
				1191
				1192	default:
				1193	nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError,
				1194	"unknown format code '%c' for object of type 'float'",
				1195	type, mp_obj_get_type_str(arg)));
				1196	}
				1197	} else {
				1198	// arg doesn't look like a number
				1199
				1200	if (align == '=') {
				1201	nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "'=' alignment not allowed in string format specifier"));
				1202	}
				1203
				1204	switch (type) {
				1205	case '\0':
				1206	mp_obj_print_helper((void ()(void, const char*, ...))vstr_printf, vstr, arg, PRINT_STR);
				1207	break;
				1208
				1209	case 's':
				1210	{
				1211	uint len;
				1212	const char *s = mp_obj_str_get_data(arg, &len);
				1213	if (precision < 0) {
				1214	precision = len;
				1215	}
				1216	if (len > precision) {
				1217	len = precision;
				1218	}
				1219	pfenv_print_strn(&pfenv_vstr, s, len, flags, fill, width);
				1220	break;
				1221	}
				1222
				1223	default:
				1224	nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError,
				1225	"unknown format code '%c' for object of type 'str'",
				1226	type, mp_obj_get_type_str(arg)));
				1227	}
				1228	}
				1229	}
				1230
				1231	mp_obj_t s = mp_obj_new_str(vstr->buf, vstr->len, false);
				1232	vstr_free(vstr);
				1233	return s;
				1234	}
				1235
				1236	STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, uint n_args, const mp_obj_t *args, mp_obj_t dict) {
				1237	assert(MP_OBJ_IS_STR(pattern));
				1238
				1239	GET_STR_DATA_LEN(pattern, str, len);
				1240	const byte *start_str = str;
				1241	int arg_i = 0;
				1242	vstr_t *vstr = vstr_new();
				1243	pfenv_t pfenv_vstr;
				1244	pfenv_vstr.data = vstr;
				1245	pfenv_vstr.print_strn = pfenv_vstr_add_strn;
				1246
				1247	for (const byte *top = str + len; str < top; str++) {
				1248	mp_obj_t arg = MP_OBJ_NULL;
				1249	if (*str != '%') {
				1250	vstr_add_char(vstr, *str);
				1251	continue;
				1252	}
				1253	if (++str >= top) {
				1254	break;
				1255	}
				1256	if (*str == '%') {
				1257	vstr_add_char(vstr, '%');
				1258	continue;
				1259	}
				1260
				1261	// Dictionary value lookup
				1262	if (*str == '(') {
				1263	const byte *key = ++str;
				1264	while (*str != ')') {
				1265	if (str >= top) {
				1266	nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "incomplete format key"));
				1267	}
				1268	++str;
				1269	}
				1270	mp_obj_t k_obj = mp_obj_new_str((const char*)key, str - key, true);
				1271	arg = mp_obj_dict_get(dict, k_obj);
				1272	str++;
				1273	}
				1274
				1275	int flags = 0;
				1276	char fill = ' ';
				1277	int alt = 0;
				1278	while (str < top) {
				1279	if (*str == '-') flags \|= PF_FLAG_LEFT_ADJUST;
				1280	else if (*str == '+') flags \|= PF_FLAG_SHOW_SIGN;
				1281	else if (*str == ' ') flags \|= PF_FLAG_SPACE_SIGN;
				1282	else if (*str == '#') alt = PF_FLAG_SHOW_PREFIX;
				1283	else if (*str == '0') {
				1284	flags \|= PF_FLAG_PAD_AFTER_SIGN;
				1285	fill = '0';
				1286	} else break;
				1287	str++;
				1288	}
				1289	// parse width, if it exists
				1290	int width = 0;
				1291	if (str < top) {
				1292	if (str == '') {
				1293	if (arg_i >= n_args) {
				1294	goto not_enough_args;
				1295	}
				1296	width = mp_obj_get_int(args[arg_i++]);
				1297	str++;
				1298	} else {
				1299	for (; str < top && '0' <= str && str <= '9'; str++) {
				1300	width = width * 10 + *str - '0';
				1301	}
				1302	}
				1303	}
				1304	int prec = -1;
				1305	if (str < top && *str == '.') {
				1306	if (++str < top) {
				1307	if (str == '') {
				1308	if (arg_i >= n_args) {
				1309	goto not_enough_args;
				1310	}
				1311	prec = mp_obj_get_int(args[arg_i++]);
				1312	str++;
				1313	} else {
				1314	prec = 0;
				1315	for (; str < top && '0' <= str && str <= '9'; str++) {
				1316	prec = prec * 10 + *str - '0';
				1317	}
				1318	}
				1319	}
				1320	}
				1321
				1322	if (str >= top) {
				1323	nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "incomplete format"));
				1324	}
				1325
				1326	// Tuple value lookup
				1327	if (arg == MP_OBJ_NULL) {
				1328	if (arg_i >= n_args) {
				1329	not_enough_args:
				1330	nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "not enough arguments for format string"));
				1331	}
				1332	arg = args[arg_i++];
				1333	}
				1334	switch (*str) {
				1335	case 'c':
				1336	if (MP_OBJ_IS_STR(arg)) {
				1337	uint len;
				1338	const char *s = mp_obj_str_get_data(arg, &len);
				1339	if (len != 1) {
				1340	nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "%%c requires int or char"));
				1341	break;
				1342	}
				1343	pfenv_print_strn(&pfenv_vstr, s, 1, flags, ' ', width);
				1344	break;
				1345	}
				1346	if (arg_looks_integer(arg)) {
				1347	char ch = mp_obj_get_int(arg);
				1348	pfenv_print_strn(&pfenv_vstr, &ch, 1, flags, ' ', width);
				1349	break;
				1350	}
				1351	#if MICROPY_PY_BUILTINS_FLOAT
				1352	// This is what CPython reports, so we report the same.
				1353	if (MP_OBJ_IS_TYPE(arg, &mp_type_float)) {
				1354	nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "integer argument expected, got float"));
				1355
				1356	}
				1357	#endif
				1358	nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "an integer is required"));
				1359	break;
				1360
				1361	case 'd':
				1362	case 'i':
				1363	case 'u':
				1364	pfenv_print_mp_int(&pfenv_vstr, arg_as_int(arg), 1, 10, 'a', flags, fill, width, prec);
				1365	break;
				1366
				1367	#if MICROPY_PY_BUILTINS_FLOAT
				1368	case 'e':
				1369	case 'E':
				1370	case 'f':
				1371	case 'F':
				1372	case 'g':
				1373	case 'G':
				1374	pfenv_print_float(&pfenv_vstr, mp_obj_get_float(arg), *str, flags, fill, width, prec);
				1375	break;
				1376	#endif
				1377
				1378	case 'o':
				1379	if (alt) {
				1380	flags \|= (PF_FLAG_SHOW_PREFIX \| PF_FLAG_SHOW_OCTAL_LETTER);
				1381	}
				1382	pfenv_print_mp_int(&pfenv_vstr, arg, 1, 8, 'a', flags, fill, width, prec);
				1383	break;
				1384
				1385	case 'r':
				1386	case 's':
				1387	{
				1388	vstr_t *arg_vstr = vstr_new();
				1389	mp_obj_print_helper((void ()(void, const char*, ...))vstr_printf,
				1390	arg_vstr, arg, *str == 'r' ? PRINT_REPR : PRINT_STR);
				1391	uint len = vstr_len(arg_vstr);
				1392	if (prec < 0) {
				1393	prec = len;
				1394	}
				1395	if (len > prec) {
				1396	len = prec;
				1397	}
				1398	pfenv_print_strn(&pfenv_vstr, vstr_str(arg_vstr), len, flags, ' ', width);
				1399	vstr_free(arg_vstr);
				1400	break;
				1401	}
				1402
				1403	case 'X':
				1404	case 'x':
				1405	pfenv_print_mp_int(&pfenv_vstr, arg, 1, 16, *str - ('X' - 'A'), flags \| alt, fill, width, prec);
				1406	break;
				1407
				1408	default:
				1409	nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError,
				1410	"unsupported format character '%c' (0x%x) at index %d",
				1411	str, str, str - start_str));
				1412	}
				1413	}
				1414
				1415	if (arg_i != n_args) {
				1416	nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "not all arguments converted during string formatting"));
				1417	}
				1418
				1419	mp_obj_t s = mp_obj_new_str(vstr->buf, vstr->len, false);
				1420	vstr_free(vstr);
				1421	return s;
				1422	}
				1423
				1424	STATIC mp_obj_t str_replace(uint n_args, const mp_obj_t *args) {
				1425	assert(MP_OBJ_IS_STR(args[0]));
				1426
				1427	machine_int_t max_rep = -1;
				1428	if (n_args == 4) {
				1429	max_rep = mp_obj_get_int(args[3]);
				1430	if (max_rep == 0) {
				1431	return args[0];
				1432	} else if (max_rep < 0) {
				1433	max_rep = -1;
				1434	}
				1435	}
				1436
				1437	// if max_rep is still -1 by this point we will need to do all possible replacements
				1438
				1439	// check argument types
				1440
				1441	if (!MP_OBJ_IS_STR(args[1])) {
				1442	bad_implicit_conversion(args[1]);
				1443	}
				1444
				1445	if (!MP_OBJ_IS_STR(args[2])) {
				1446	bad_implicit_conversion(args[2]);
				1447	}
				1448
				1449	// extract string data
				1450
				1451	GET_STR_DATA_LEN(args[0], str, str_len);
				1452	GET_STR_DATA_LEN(args[1], old, old_len);
				1453	GET_STR_DATA_LEN(args[2], new, new_len);
				1454
				1455	// old won't exist in str if it's longer, so nothing to replace
				1456	if (old_len > str_len) {
				1457	return args[0];
				1458	}
				1459
				1460	// data for the replaced string
				1461	byte *data = NULL;
				1462	mp_obj_t replaced_str = MP_OBJ_NULL;
				1463
				1464	// do 2 passes over the string:
				1465	// first pass computes the required length of the replaced string
				1466	// second pass does the replacements
				1467	for (;;) {
				1468	machine_uint_t replaced_str_index = 0;
				1469	machine_uint_t num_replacements_done = 0;
				1470	const byte *old_occurrence;
				1471	const byte *offset_ptr = str;
				1472	machine_uint_t str_len_remain = str_len;
				1473	if (old_len == 0) {
				1474	// if old_str is empty, copy new_str to start of replaced string
				1475	// copy the replacement string
				1476	if (data != NULL) {
				1477	memcpy(data, new, new_len);
				1478	}
				1479	replaced_str_index += new_len;
				1480	num_replacements_done++;
				1481	}
				1482	while (num_replacements_done != max_rep && str_len_remain > 0 && (old_occurrence = find_subbytes(offset_ptr, str_len_remain, old, old_len, 1)) != NULL) {
				1483	if (old_len == 0) {
				1484	old_occurrence += 1;
				1485	}
				1486	// copy from just after end of last occurrence of to-be-replaced string to right before start of next occurrence
				1487	if (data != NULL) {
				1488	memcpy(data + replaced_str_index, offset_ptr, old_occurrence - offset_ptr);
				1489	}
				1490	replaced_str_index += old_occurrence - offset_ptr;
				1491	// copy the replacement string
				1492	if (data != NULL) {
				1493	memcpy(data + replaced_str_index, new, new_len);
				1494	}
				1495	replaced_str_index += new_len;
				1496	offset_ptr = old_occurrence + old_len;
				1497	str_len_remain = str + str_len - offset_ptr;
				1498	num_replacements_done++;
				1499	}
				1500
				1501	// copy from just after end of last occurrence of to-be-replaced string to end of old string
				1502	if (data != NULL) {
				1503	memcpy(data + replaced_str_index, offset_ptr, str_len_remain);
				1504	}
				1505	replaced_str_index += str_len_remain;
				1506
				1507	if (data == NULL) {
				1508	// first pass
				1509	if (num_replacements_done == 0) {
				1510	// no substr found, return original string
				1511	return args[0];
				1512	} else {
				1513	// substr found, allocate new string
				1514	replaced_str = mp_obj_str_builder_start(mp_obj_get_type(args[0]), replaced_str_index, &data);
				1515	assert(data != NULL);
				1516	}
				1517	} else {
				1518	// second pass, we are done
				1519	break;
				1520	}
				1521	}
				1522
				1523	return mp_obj_str_builder_end(replaced_str);
				1524	}
				1525
				1526	STATIC mp_obj_t str_count(uint n_args, const mp_obj_t *args) {
				1527	assert(2 <= n_args && n_args <= 4);
				1528	assert(MP_OBJ_IS_STR(args[0]));
				1529	assert(MP_OBJ_IS_STR(args[1]));
				1530
				1531	GET_STR_DATA_LEN(args[0], haystack, haystack_len);
				1532	GET_STR_DATA_LEN(args[1], needle, needle_len);
				1533
				1534	machine_uint_t start = 0;
				1535	machine_uint_t end = haystack_len;
				1536	if (n_args >= 3 && args[2] != mp_const_none) {
				1537	start = mp_get_index(&mp_type_str, haystack_len, args[2], true);
				1538	}
				1539	if (n_args >= 4 && args[3] != mp_const_none) {
				1540	end = mp_get_index(&mp_type_str, haystack_len, args[3], true);
				1541	}
				1542
				1543	// if needle_len is zero then we count each gap between characters as an occurrence
				1544	if (needle_len == 0) {
				1545	return MP_OBJ_NEW_SMALL_INT(end - start + 1);
				1546	}
				1547
				1548	// count the occurrences
				1549	machine_int_t num_occurrences = 0;
				1550	for (machine_uint_t haystack_index = start; haystack_index + needle_len <= end; haystack_index++) {
				1551	if (memcmp(&haystack[haystack_index], needle, needle_len) == 0) {
				1552	num_occurrences++;
				1553	haystack_index += needle_len - 1;
				1554	}
				1555	}
				1556
				1557	return MP_OBJ_NEW_SMALL_INT(num_occurrences);
				1558	}
				1559
				1560	STATIC mp_obj_t str_partitioner(mp_obj_t self_in, mp_obj_t arg, machine_int_t direction) {
				1561	if (!is_str_or_bytes(self_in)) {
				1562	assert(0);
				1563	}
				1564	mp_obj_type_t *self_type = mp_obj_get_type(self_in);
				1565	if (self_type != mp_obj_get_type(arg)) {
				1566	arg_type_mixup();
				1567	}
				1568
				1569	GET_STR_DATA_LEN(self_in, str, str_len);
				1570	GET_STR_DATA_LEN(arg, sep, sep_len);
				1571
				1572	if (sep_len == 0) {
				1573	nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "empty separator"));
				1574	}
				1575
				1576	mp_obj_t result[] = {MP_OBJ_NEW_QSTR(MP_QSTR_), MP_OBJ_NEW_QSTR(MP_QSTR_), MP_OBJ_NEW_QSTR(MP_QSTR_)};
				1577
				1578	if (direction > 0) {
				1579	result[0] = self_in;
				1580	} else {
				1581	result[2] = self_in;
				1582	}
				1583
				1584	const byte *position_ptr = find_subbytes(str, str_len, sep, sep_len, direction);
				1585	if (position_ptr != NULL) {
				1586	machine_uint_t position = position_ptr - str;
				1587	result[0] = mp_obj_new_str_of_type(self_type, str, position);
				1588	result[1] = arg;
				1589	result[2] = mp_obj_new_str_of_type(self_type, str + position + sep_len, str_len - position - sep_len);
				1590	}
				1591
				1592	return mp_obj_new_tuple(3, result);
				1593	}
				1594
				1595	STATIC mp_obj_t str_partition(mp_obj_t self_in, mp_obj_t arg) {
				1596	return str_partitioner(self_in, arg, 1);
				1597	}
				1598
				1599	STATIC mp_obj_t str_rpartition(mp_obj_t self_in, mp_obj_t arg) {
				1600	return str_partitioner(self_in, arg, -1);
				1601	}
				1602
				1603	// Supposedly not too critical operations, so optimize for code size
				1604	STATIC mp_obj_t str_caseconv(unichar (*op)(unichar), mp_obj_t self_in) {
				1605	GET_STR_DATA_LEN(self_in, self_data, self_len);
				1606	byte *data;
				1607	mp_obj_t s = mp_obj_str_builder_start(mp_obj_get_type(self_in), self_len, &data);
				1608	for (int i = 0; i < self_len; i++) {
				1609	data++ = op(self_data++);
				1610	}
				1611	*data = 0;
				1612	return mp_obj_str_builder_end(s);
				1613	}
				1614
				1615	STATIC mp_obj_t str_lower(mp_obj_t self_in) {
				1616	return str_caseconv(unichar_tolower, self_in);
				1617	}
				1618
				1619	STATIC mp_obj_t str_upper(mp_obj_t self_in) {
				1620	return str_caseconv(unichar_toupper, self_in);
				1621	}
				1622
				1623	STATIC mp_obj_t str_uni_istype(bool (*f)(unichar), mp_obj_t self_in) {
				1624	GET_STR_DATA_LEN(self_in, self_data, self_len);
				1625
				1626	if (self_len == 0) {
				1627	return mp_const_false; // default to False for empty str
				1628	}
				1629
				1630	if (f != unichar_isupper && f != unichar_islower) {
				1631	for (int i = 0; i < self_len; i++) {
				1632	if (!f(*self_data++)) {
				1633	return mp_const_false;
				1634	}
				1635	}
				1636	} else {
				1637	bool contains_alpha = false;
				1638
				1639	for (int i = 0; i < self_len; i++) { // only check alphanumeric characters
				1640	if (unichar_isalpha(*self_data++)) {
				1641	contains_alpha = true;
				1642	if (!f(*(self_data - 1))) { // -1 because we already incremented above
				1643	return mp_const_false;
				1644	}
				1645	}
				1646	}
				1647
				1648	if (!contains_alpha) {
				1649	return mp_const_false;
				1650	}
				1651	}
				1652
				1653	return mp_const_true;
				1654	}
				1655
				1656	STATIC mp_obj_t str_isspace(mp_obj_t self_in) {
				1657	return str_uni_istype(unichar_isspace, self_in);
				1658	}
				1659
				1660	STATIC mp_obj_t str_isalpha(mp_obj_t self_in) {
				1661	return str_uni_istype(unichar_isalpha, self_in);
				1662	}
				1663
				1664	STATIC mp_obj_t str_isdigit(mp_obj_t self_in) {
				1665	return str_uni_istype(unichar_isdigit, self_in);
				1666	}
				1667
				1668	STATIC mp_obj_t str_isupper(mp_obj_t self_in) {
				1669	return str_uni_istype(unichar_isupper, self_in);
				1670	}
				1671
				1672	STATIC mp_obj_t str_islower(mp_obj_t self_in) {
				1673	return str_uni_istype(unichar_islower, self_in);
				1674	}
				1675
				1676	#if MICROPY_CPYTHON_COMPAT
				1677	// These methods are superfluous in the presense of str() and bytes()
				1678	// constructors.
				1679	// TODO: should accept kwargs too
				1680	STATIC mp_obj_t bytes_decode(uint n_args, const mp_obj_t *args) {
				1681	mp_obj_t new_args[2];
				1682	if (n_args == 1) {
				1683	new_args[0] = args[0];
				1684	new_args[1] = MP_OBJ_NEW_QSTR(MP_QSTR_utf_hyphen_8);
				1685	args = new_args;
				1686	n_args++;
				1687	}
				1688	return str_make_new(NULL, n_args, 0, args);
				1689	}
				1690
				1691	// TODO: should accept kwargs too
				1692	STATIC mp_obj_t str_encode(uint n_args, const mp_obj_t *args) {
				1693	mp_obj_t new_args[2];
				1694	if (n_args == 1) {
				1695	new_args[0] = args[0];
				1696	new_args[1] = MP_OBJ_NEW_QSTR(MP_QSTR_utf_hyphen_8);
				1697	args = new_args;
				1698	n_args++;
				1699	}
				1700	return bytes_make_new(NULL, n_args, 0, args);
				1701	}
				1702	#endif
				1703
				1704	STATIC machine_int_t str_get_buffer(mp_obj_t self_in, mp_buffer_info_t *bufinfo, int flags) {
				1705	if (flags == MP_BUFFER_READ) {
				1706	GET_STR_DATA_LEN(self_in, str_data, str_len);
				1707	bufinfo->buf = (void*)str_data;
				1708	bufinfo->len = str_len;
				1709	bufinfo->typecode = 'b';
				1710	return 0;
				1711	} else {
				1712	// can't write to a string
				1713	bufinfo->buf = NULL;
				1714	bufinfo->len = 0;
				1715	bufinfo->typecode = -1;
				1716	return 1;
				1717	}
				1718	}
				1719
				1720	#if MICROPY_CPYTHON_COMPAT
				1721	STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(bytes_decode_obj, 1, 3, bytes_decode);
				1722	STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_encode_obj, 1, 3, str_encode);
				1723	#endif
				1724	STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_find_obj, 2, 4, str_find);
				1725	STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rfind_obj, 2, 4, str_rfind);
				1726	STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_index_obj, 2, 4, str_index);
				1727	STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rindex_obj, 2, 4, str_rindex);
				1728	STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_join_obj, str_join);
				1729	STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_split_obj, 1, 3, str_split);
				1730	STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rsplit_obj, 1, 3, str_rsplit);
				1731	STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_startswith_obj, 2, 3, str_startswith);
				1732	STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_endswith_obj, 2, 3, str_endswith);
				1733	STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_strip_obj, 1, 2, str_strip);
				1734	STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_lstrip_obj, 1, 2, str_lstrip);
				1735	STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rstrip_obj, 1, 2, str_rstrip);
				1736	STATIC MP_DEFINE_CONST_FUN_OBJ_VAR(str_format_obj, 1, mp_obj_str_format);
				1737	STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_replace_obj, 3, 4, str_replace);
				1738	STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_count_obj, 2, 4, str_count);
				1739	STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_partition_obj, str_partition);
				1740	STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_rpartition_obj, str_rpartition);
				1741	STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_lower_obj, str_lower);
				1742	STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_upper_obj, str_upper);
				1743	STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_isspace_obj, str_isspace);
				1744	STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_isalpha_obj, str_isalpha);
				1745	STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_isdigit_obj, str_isdigit);
				1746	STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_isupper_obj, str_isupper);
				1747	STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_islower_obj, str_islower);
				1748
				1749	STATIC const mp_map_elem_t str_locals_dict_table[] = {
				1750	#if MICROPY_CPYTHON_COMPAT
				1751	{ MP_OBJ_NEW_QSTR(MP_QSTR_decode), (mp_obj_t)&bytes_decode_obj },
				1752	{ MP_OBJ_NEW_QSTR(MP_QSTR_encode), (mp_obj_t)&str_encode_obj },
				1753	#endif
				1754	{ MP_OBJ_NEW_QSTR(MP_QSTR_find), (mp_obj_t)&str_find_obj },
				1755	{ MP_OBJ_NEW_QSTR(MP_QSTR_rfind), (mp_obj_t)&str_rfind_obj },
				1756	{ MP_OBJ_NEW_QSTR(MP_QSTR_index), (mp_obj_t)&str_index_obj },
				1757	{ MP_OBJ_NEW_QSTR(MP_QSTR_rindex), (mp_obj_t)&str_rindex_obj },
				1758	{ MP_OBJ_NEW_QSTR(MP_QSTR_join), (mp_obj_t)&str_join_obj },
				1759	{ MP_OBJ_NEW_QSTR(MP_QSTR_split), (mp_obj_t)&str_split_obj },
				1760	{ MP_OBJ_NEW_QSTR(MP_QSTR_rsplit), (mp_obj_t)&str_rsplit_obj },
				1761	{ MP_OBJ_NEW_QSTR(MP_QSTR_startswith), (mp_obj_t)&str_startswith_obj },
				1762	{ MP_OBJ_NEW_QSTR(MP_QSTR_endswith), (mp_obj_t)&str_endswith_obj },
				1763	{ MP_OBJ_NEW_QSTR(MP_QSTR_strip), (mp_obj_t)&str_strip_obj },
				1764	{ MP_OBJ_NEW_QSTR(MP_QSTR_lstrip), (mp_obj_t)&str_lstrip_obj },
				1765	{ MP_OBJ_NEW_QSTR(MP_QSTR_rstrip), (mp_obj_t)&str_rstrip_obj },
				1766	{ MP_OBJ_NEW_QSTR(MP_QSTR_format), (mp_obj_t)&str_format_obj },
				1767	{ MP_OBJ_NEW_QSTR(MP_QSTR_replace), (mp_obj_t)&str_replace_obj },
				1768	{ MP_OBJ_NEW_QSTR(MP_QSTR_count), (mp_obj_t)&str_count_obj },
				1769	{ MP_OBJ_NEW_QSTR(MP_QSTR_partition), (mp_obj_t)&str_partition_obj },
				1770	{ MP_OBJ_NEW_QSTR(MP_QSTR_rpartition), (mp_obj_t)&str_rpartition_obj },
				1771	{ MP_OBJ_NEW_QSTR(MP_QSTR_lower), (mp_obj_t)&str_lower_obj },
				1772	{ MP_OBJ_NEW_QSTR(MP_QSTR_upper), (mp_obj_t)&str_upper_obj },
				1773	{ MP_OBJ_NEW_QSTR(MP_QSTR_isspace), (mp_obj_t)&str_isspace_obj },
				1774	{ MP_OBJ_NEW_QSTR(MP_QSTR_isalpha), (mp_obj_t)&str_isalpha_obj },
				1775	{ MP_OBJ_NEW_QSTR(MP_QSTR_isdigit), (mp_obj_t)&str_isdigit_obj },
				1776	{ MP_OBJ_NEW_QSTR(MP_QSTR_isupper), (mp_obj_t)&str_isupper_obj },
				1777	{ MP_OBJ_NEW_QSTR(MP_QSTR_islower), (mp_obj_t)&str_islower_obj },
				1778	};
				1779
				1780	STATIC MP_DEFINE_CONST_DICT(str_locals_dict, str_locals_dict_table);
				1781
				1782	const mp_obj_type_t mp_type_str = {
				1783	{ &mp_type_type },
				1784	.name = MP_QSTR_str,
				1785	.print = str_print,
				1786	.make_new = str_make_new,
				1787	.binary_op = str_binary_op,
				1788	.subscr = str_subscr,
				1789	.getiter = mp_obj_new_str_iterator,
				1790	.buffer_p = { .get_buffer = str_get_buffer },
				1791	.locals_dict = (mp_obj_t)&str_locals_dict,
				1792	};
				1793
				1794	// Reuses most of methods from str
				1795	const mp_obj_type_t mp_type_bytes = {
				1796	{ &mp_type_type },
				1797	.name = MP_QSTR_bytes,
				1798	.print = str_print,
				1799	.make_new = bytes_make_new,
				1800	.binary_op = str_binary_op,
				1801	.subscr = str_subscr,
				1802	.getiter = mp_obj_new_bytes_iterator,
				1803	.buffer_p = { .get_buffer = str_get_buffer },
				1804	.locals_dict = (mp_obj_t)&str_locals_dict,
				1805	};
				1806
				1807	// the zero-length bytes
				1808	STATIC const mp_obj_str_t empty_bytes_obj = {{&mp_type_bytes}, 0, 0, NULL};
				1809	const mp_obj_t mp_const_empty_bytes = (mp_obj_t)&empty_bytes_obj;
				1810
				1811	mp_obj_t mp_obj_str_builder_start(const mp_obj_type_t type, uint len, byte *data) {
				1812	mp_obj_str_t *o = m_new_obj(mp_obj_str_t);
				1813	o->base.type = type;
				1814	o->len = len;
				1815	o->hash = 0;
				1816	byte *p = m_new(byte, len + 1);
				1817	o->data = p;
				1818	*data = p;
				1819	return o;
				1820	}
				1821
				1822	mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in) {
				1823	mp_obj_str_t *o = o_in;
				1824	o->hash = qstr_compute_hash(o->data, o->len);
				1825	byte p = (byte)o->data;
				1826	p[o->len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings
				1827	return o;
				1828	}
				1829
				1830	mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t type, const byte data, uint len) {
				1831	mp_obj_str_t *o = m_new_obj(mp_obj_str_t);
				1832	o->base.type = type;
				1833	o->len = len;
				1834	if (data) {
				1835	o->hash = qstr_compute_hash(data, len);
				1836	byte *p = m_new(byte, len + 1);
				1837	o->data = p;
				1838	memcpy(p, data, len * sizeof(byte));
				1839	p[len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings
				1840	}
				1841	return o;
				1842	}
				1843
				1844	mp_obj_t mp_obj_new_str(const char* data, uint len, bool make_qstr_if_not_already) {
				1845	if (make_qstr_if_not_already) {
				1846	// use existing, or make a new qstr
				1847	return MP_OBJ_NEW_QSTR(qstr_from_strn(data, len));
				1848	} else {
				1849	qstr q = qstr_find_strn(data, len);
				1850	if (q != MP_QSTR_NULL) {
				1851	// qstr with this data already exists
				1852	return MP_OBJ_NEW_QSTR(q);
				1853	} else {
				1854	// no existing qstr, don't make one
				1855	return mp_obj_new_str_of_type(&mp_type_str, (const byte*)data, len);
				1856	}
				1857	}
				1858	}
				1859
				1860	mp_obj_t mp_obj_str_intern(mp_obj_t str) {
				1861	GET_STR_DATA_LEN(str, data, len);
				1862	return MP_OBJ_NEW_QSTR(qstr_from_strn((const char*)data, len));
				1863	}
				1864
				1865	mp_obj_t mp_obj_new_bytes(const byte* data, uint len) {
				1866	return mp_obj_new_str_of_type(&mp_type_bytes, data, len);
				1867	}
				1868
				1869	bool mp_obj_str_equal(mp_obj_t s1, mp_obj_t s2) {
				1870	if (MP_OBJ_IS_QSTR(s1) && MP_OBJ_IS_QSTR(s2)) {
				1871	return s1 == s2;
				1872	} else {
				1873	GET_STR_HASH(s1, h1);
				1874	GET_STR_HASH(s2, h2);
				1875	// If any of hashes is 0, it means it's not valid
				1876	if (h1 != 0 && h2 != 0 && h1 != h2) {
				1877	return false;
				1878	}
				1879	GET_STR_DATA_LEN(s1, d1, l1);
				1880	GET_STR_DATA_LEN(s2, d2, l2);
				1881	if (l1 != l2) {
				1882	return false;
				1883	}
				1884	return memcmp(d1, d2, l1) == 0;
				1885	}
				1886	}
				1887
				1888	STATIC void bad_implicit_conversion(mp_obj_t self_in) {
				1889	nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError, "Can't convert '%s' object to str implicitly", mp_obj_get_type_str(self_in)));
				1890	}
				1891
				1892	STATIC void arg_type_mixup() {
				1893	nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "Can't mix str and bytes arguments"));
				1894	}
				1895
				1896	uint mp_obj_str_get_hash(mp_obj_t self_in) {
				1897	// TODO: This has too big overhead for hash accessor
				1898	if (MP_OBJ_IS_STR(self_in) \|\| MP_OBJ_IS_TYPE(self_in, &mp_type_bytes)) {
				1899	GET_STR_HASH(self_in, h);
				1900	return h;
				1901	} else {
				1902	bad_implicit_conversion(self_in);
				1903	}
				1904	}
				1905
				1906	uint mp_obj_str_get_len(mp_obj_t self_in) {
				1907	// TODO This has a double check for the type, one in obj.c and one here
				1908	if (MP_OBJ_IS_STR(self_in) \|\| MP_OBJ_IS_TYPE(self_in, &mp_type_bytes)) {
Chris Angelico	64b468d	2014-06-04 05:28:12 +1000	[diff] [blame^]	1909	GET_STR_DATA_LEN(self_in, self_data, self_len);
				1910	return unichar_charlen((const char *)self_data, self_len);
Paul Sokolovsky	8386534	2014-06-13 00:51:34 +0300	[diff] [blame]	1911	} else {
				1912	bad_implicit_conversion(self_in);
				1913	}
				1914	}
				1915
				1916	// use this if you will anyway convert the string to a qstr
				1917	// will be more efficient for the case where it's already a qstr
				1918	qstr mp_obj_str_get_qstr(mp_obj_t self_in) {
				1919	if (MP_OBJ_IS_QSTR(self_in)) {
				1920	return MP_OBJ_QSTR_VALUE(self_in);
				1921	} else if (MP_OBJ_IS_TYPE(self_in, &mp_type_str)) {
				1922	mp_obj_str_t *self = self_in;
				1923	return qstr_from_strn((char*)self->data, self->len);
				1924	} else {
				1925	bad_implicit_conversion(self_in);
				1926	}
				1927	}
				1928
				1929	// only use this function if you need the str data to be zero terminated
				1930	// at the moment all strings are zero terminated to help with C ASCIIZ compatibility
				1931	const char *mp_obj_str_get_str(mp_obj_t self_in) {
				1932	if (MP_OBJ_IS_STR(self_in)) {
				1933	GET_STR_DATA_LEN(self_in, s, l);
				1934	(void)l; // len unused
				1935	return (const char*)s;
				1936	} else {
				1937	bad_implicit_conversion(self_in);
				1938	}
				1939	}
				1940
				1941	const char mp_obj_str_get_data(mp_obj_t self_in, uint len) {
				1942	if (is_str_or_bytes(self_in)) {
				1943	GET_STR_DATA_LEN(self_in, s, l);
				1944	*len = l;
				1945	return (const char*)s;
				1946	} else {
				1947	bad_implicit_conversion(self_in);
				1948	}
				1949	}
				1950
				1951	/******************************************************************************/
				1952	/* str iterator */
				1953
				1954	typedef struct _mp_obj_str_it_t {
				1955	mp_obj_base_t base;
				1956	mp_obj_t str;
				1957	machine_uint_t cur;
				1958	} mp_obj_str_it_t;
				1959
				1960	STATIC mp_obj_t str_it_iternext(mp_obj_t self_in) {
				1961	mp_obj_str_it_t *self = self_in;
				1962	GET_STR_DATA_LEN(self->str, str, len);
				1963	if (self->cur < len) {
				1964	mp_obj_t o_out = mp_obj_new_str((const char*)str + self->cur, 1, true);
				1965	self->cur += 1;
				1966	return o_out;
				1967	} else {
				1968	return MP_OBJ_STOP_ITERATION;
				1969	}
				1970	}
				1971
				1972	STATIC const mp_obj_type_t mp_type_str_it = {
				1973	{ &mp_type_type },
				1974	.name = MP_QSTR_iterator,
				1975	.getiter = mp_identity,
				1976	.iternext = str_it_iternext,
				1977	};
				1978
				1979	STATIC mp_obj_t bytes_it_iternext(mp_obj_t self_in) {
				1980	mp_obj_str_it_t *self = self_in;
				1981	GET_STR_DATA_LEN(self->str, str, len);
				1982	if (self->cur < len) {
				1983	mp_obj_t o_out = MP_OBJ_NEW_SMALL_INT((mp_small_int_t)str[self->cur]);
				1984	self->cur += 1;
				1985	return o_out;
				1986	} else {
				1987	return MP_OBJ_STOP_ITERATION;
				1988	}
				1989	}
				1990
				1991	STATIC const mp_obj_type_t mp_type_bytes_it = {
				1992	{ &mp_type_type },
				1993	.name = MP_QSTR_iterator,
				1994	.getiter = mp_identity,
				1995	.iternext = bytes_it_iternext,
				1996	};
				1997
				1998	mp_obj_t mp_obj_new_str_iterator(mp_obj_t str) {
				1999	mp_obj_str_it_t *o = m_new_obj(mp_obj_str_it_t);
				2000	o->base.type = &mp_type_str_it;
				2001	o->str = str;
				2002	o->cur = 0;
				2003	return o;
				2004	}
				2005
				2006	mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str) {
				2007	mp_obj_str_it_t *o = m_new_obj(mp_obj_str_it_t);
				2008	o->base.type = &mp_type_bytes_it;
				2009	o->str = str;
				2010	o->cur = 0;
				2011	return o;
				2012	}