Blame - py/objstrunicode.c - lite/micropython

blob: 6656090c84f05db403ba96d7065d478cbbfdeb14 [file] [log] [blame]

Paul Sokolovsky	8386534	2014-06-13 00:51:34 +0300	[diff] [blame^]	1	/*
				2	* This file is part of the Micro Python project, http://micropython.org/
				3	*
				4	* The MIT License (MIT)
				5	*
				6	* Copyright (c) 2013, 2014 Damien P. George
				7	* Copyright (c) 2014 Paul Sokolovsky
				8	*
				9	* Permission is hereby granted, free of charge, to any person obtaining a copy
				10	* of this software and associated documentation files (the "Software"), to deal
				11	* in the Software without restriction, including without limitation the rights
				12	* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
				13	* copies of the Software, and to permit persons to whom the Software is
				14	* furnished to do so, subject to the following conditions:
				15	*
				16	* The above copyright notice and this permission notice shall be included in
				17	* all copies or substantial portions of the Software.
				18	*
				19	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				20	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				21	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				22	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				23	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				24	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
				25	* THE SOFTWARE.
				26	*/
				27
				28	#include <stdbool.h>
				29	#include <string.h>
				30	#include <assert.h>
				31
				32	#include "mpconfig.h"
				33	#include "nlr.h"
				34	#include "misc.h"
				35	#include "qstr.h"
				36	#include "obj.h"
				37	#include "runtime0.h"
				38	#include "runtime.h"
				39	#include "pfenv.h"
				40	#include "objstr.h"
				41	#include "objlist.h"
				42
				43	STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, uint n_args, const mp_obj_t *args, mp_obj_t dict);
				44	const mp_obj_t mp_const_empty_bytes;
				45
				46	// use this macro to extract the string hash
				47	#define GET_STR_HASH(str_obj_in, str_hash) uint str_hash; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_hash = qstr_hash(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_hash = ((mp_obj_str_t*)str_obj_in)->hash; }
				48
				49	// use this macro to extract the string length
				50	#define GET_STR_LEN(str_obj_in, str_len) uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_len = qstr_len(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; }
				51
				52	// use this macro to extract the string data and length
				53	#define GET_STR_DATA_LEN(str_obj_in, str_data, str_len) const byte str_data; uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len); } else { str_len = ((mp_obj_str_t)str_obj_in)->len; str_data = ((mp_obj_str_t*)str_obj_in)->data; }
				54
				55	STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str);
				56	STATIC mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str);
				57	STATIC NORETURN void bad_implicit_conversion(mp_obj_t self_in);
				58	STATIC NORETURN void arg_type_mixup();
				59
				60	STATIC bool is_str_or_bytes(mp_obj_t o) {
				61	return MP_OBJ_IS_STR(o) \|\| MP_OBJ_IS_TYPE(o, &mp_type_bytes);
				62	}
				63
				64	/******************************************************************************/
				65	/* str */
				66
				67	void mp_str_print_quoted(void (print)(void env, const char fmt, ...), void env, const byte *str_data, uint str_len) {
				68	// this escapes characters, but it will be very slow to print (calling print many times)
				69	bool has_single_quote = false;
				70	bool has_double_quote = false;
				71	for (const byte s = str_data, top = str_data + str_len; !has_double_quote && s < top; s++) {
				72	if (*s == '\'') {
				73	has_single_quote = true;
				74	} else if (*s == '"') {
				75	has_double_quote = true;
				76	}
				77	}
				78	int quote_char = '\'';
				79	if (has_single_quote && !has_double_quote) {
				80	quote_char = '"';
				81	}
				82	print(env, "%c", quote_char);
				83	for (const byte s = str_data, top = str_data + str_len; s < top; s++) {
				84	if (*s == quote_char) {
				85	print(env, "\\%c", quote_char);
				86	} else if (*s == '\\') {
				87	print(env, "\\\\");
				88	} else if (32 <= s && s <= 126) {
				89	print(env, "%c", *s);
				90	} else if (*s == '\n') {
				91	print(env, "\\n");
				92	} else if (*s == '\r') {
				93	print(env, "\\r");
				94	} else if (*s == '\t') {
				95	print(env, "\\t");
				96	} else {
				97	print(env, "\\x%02x", *s);
				98	}
				99	}
				100	print(env, "%c", quote_char);
				101	}
				102
				103	STATIC void str_print(void (print)(void env, const char fmt, ...), void env, mp_obj_t self_in, mp_print_kind_t kind) {
				104	GET_STR_DATA_LEN(self_in, str_data, str_len);
				105	bool is_bytes = MP_OBJ_IS_TYPE(self_in, &mp_type_bytes);
				106	if (kind == PRINT_STR && !is_bytes) {
				107	print(env, "%.*s", str_len, str_data);
				108	} else {
				109	if (is_bytes) {
				110	print(env, "b");
				111	}
				112	mp_str_print_quoted(print, env, str_data, str_len);
				113	}
				114	}
				115
				116	STATIC mp_obj_t str_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
				117	#if MICROPY_CPYTHON_COMPAT
				118	if (n_kw != 0) {
				119	mp_arg_error_unimpl_kw();
				120	}
				121	#endif
				122
				123	switch (n_args) {
				124	case 0:
				125	return MP_OBJ_NEW_QSTR(MP_QSTR_);
				126
				127	case 1:
				128	{
				129	vstr_t *vstr = vstr_new();
				130	mp_obj_print_helper((void ()(void, const char*, ...))vstr_printf, vstr, args[0], PRINT_STR);
				131	mp_obj_t s = mp_obj_new_str(vstr->buf, vstr->len, false);
				132	vstr_free(vstr);
				133	return s;
				134	}
				135
				136	case 2:
				137	case 3:
				138	{
				139	// TODO: validate 2nd/3rd args
				140	if (!MP_OBJ_IS_TYPE(args[0], &mp_type_bytes)) {
				141	nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "bytes expected"));
				142	}
				143	GET_STR_DATA_LEN(args[0], str_data, str_len);
				144	GET_STR_HASH(args[0], str_hash);
				145	mp_obj_str_t *o = mp_obj_new_str_of_type(&mp_type_str, NULL, str_len);
				146	o->data = str_data;
				147	o->hash = str_hash;
				148	return o;
				149	}
				150
				151	default:
				152	nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "str takes at most 3 arguments"));
				153	}
				154	}
				155
				156	STATIC mp_obj_t bytes_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
				157	if (n_args == 0) {
				158	return mp_const_empty_bytes;
				159	}
				160
				161	#if MICROPY_CPYTHON_COMPAT
				162	if (n_kw != 0) {
				163	mp_arg_error_unimpl_kw();
				164	}
				165	#endif
				166
				167	if (MP_OBJ_IS_STR(args[0])) {
				168	if (n_args < 2 \|\| n_args > 3) {
				169	goto wrong_args;
				170	}
				171	GET_STR_DATA_LEN(args[0], str_data, str_len);
				172	GET_STR_HASH(args[0], str_hash);
				173	mp_obj_str_t *o = mp_obj_new_str_of_type(&mp_type_bytes, NULL, str_len);
				174	o->data = str_data;
				175	o->hash = str_hash;
				176	return o;
				177	}
				178
				179	if (n_args > 1) {
				180	goto wrong_args;
				181	}
				182
				183	if (MP_OBJ_IS_SMALL_INT(args[0])) {
				184	uint len = MP_OBJ_SMALL_INT_VALUE(args[0]);
				185	byte *data;
				186
				187	mp_obj_t o = mp_obj_str_builder_start(&mp_type_bytes, len, &data);
				188	memset(data, 0, len);
				189	return mp_obj_str_builder_end(o);
				190	}
				191
				192	int len;
				193	byte *data;
				194	vstr_t *vstr = NULL;
				195	mp_obj_t o = NULL;
				196	// Try to create array of exact len if initializer len is known
				197	mp_obj_t len_in = mp_obj_len_maybe(args[0]);
				198	if (len_in == MP_OBJ_NULL) {
				199	len = -1;
				200	vstr = vstr_new();
				201	} else {
				202	len = MP_OBJ_SMALL_INT_VALUE(len_in);
				203	o = mp_obj_str_builder_start(&mp_type_bytes, len, &data);
				204	}
				205
				206	mp_obj_t iterable = mp_getiter(args[0]);
				207	mp_obj_t item;
				208	while ((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {
				209	if (len == -1) {
				210	vstr_add_char(vstr, MP_OBJ_SMALL_INT_VALUE(item));
				211	} else {
				212	*data++ = MP_OBJ_SMALL_INT_VALUE(item);
				213	}
				214	}
				215
				216	if (len == -1) {
				217	vstr_shrink(vstr);
				218	// TODO: Optimize, borrow buffer from vstr
				219	len = vstr_len(vstr);
				220	o = mp_obj_str_builder_start(&mp_type_bytes, len, &data);
				221	memcpy(data, vstr_str(vstr), len);
				222	vstr_free(vstr);
				223	}
				224
				225	return mp_obj_str_builder_end(o);
				226
				227	wrong_args:
				228	nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "wrong number of arguments"));
				229	}
				230
				231	// like strstr but with specified length and allows \0 bytes
				232	// TODO replace with something more efficient/standard
				233	STATIC const byte find_subbytes(const byte haystack, machine_uint_t hlen, const byte *needle, machine_uint_t nlen, machine_int_t direction) {
				234	if (hlen >= nlen) {
				235	machine_uint_t str_index, str_index_end;
				236	if (direction > 0) {
				237	str_index = 0;
				238	str_index_end = hlen - nlen;
				239	} else {
				240	str_index = hlen - nlen;
				241	str_index_end = 0;
				242	}
				243	for (;;) {
				244	if (memcmp(&haystack[str_index], needle, nlen) == 0) {
				245	//found
				246	return haystack + str_index;
				247	}
				248	if (str_index == str_index_end) {
				249	//not found
				250	break;
				251	}
				252	str_index += direction;
				253	}
				254	}
				255	return NULL;
				256	}
				257
				258	STATIC mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
				259	GET_STR_DATA_LEN(lhs_in, lhs_data, lhs_len);
				260	mp_obj_type_t *lhs_type = mp_obj_get_type(lhs_in);
				261	mp_obj_type_t *rhs_type = mp_obj_get_type(rhs_in);
				262	switch (op) {
				263	case MP_BINARY_OP_ADD:
				264	case MP_BINARY_OP_INPLACE_ADD:
				265	if (lhs_type == rhs_type) {
				266	// add 2 strings or bytes
				267
				268	GET_STR_DATA_LEN(rhs_in, rhs_data, rhs_len);
				269	int alloc_len = lhs_len + rhs_len;
				270
				271	/* code for making qstr
				272	byte *q_ptr;
				273	byte *val = qstr_build_start(alloc_len, &q_ptr);
				274	memcpy(val, lhs_data, lhs_len);
				275	memcpy(val + lhs_len, rhs_data, rhs_len);
				276	return MP_OBJ_NEW_QSTR(qstr_build_end(q_ptr));
				277	*/
				278
				279	// code for non-qstr
				280	byte *data;
				281	mp_obj_t s = mp_obj_str_builder_start(lhs_type, alloc_len, &data);
				282	memcpy(data, lhs_data, lhs_len);
				283	memcpy(data + lhs_len, rhs_data, rhs_len);
				284	return mp_obj_str_builder_end(s);
				285	}
				286	break;
				287
				288	case MP_BINARY_OP_IN:
				289	/* NOTE `a in b` is `b.__contains__(a)` */
				290	if (lhs_type == rhs_type) {
				291	GET_STR_DATA_LEN(rhs_in, rhs_data, rhs_len);
				292	return MP_BOOL(find_subbytes(lhs_data, lhs_len, rhs_data, rhs_len, 1) != NULL);
				293	}
				294	break;
				295
				296	case MP_BINARY_OP_MULTIPLY: {
				297	if (!MP_OBJ_IS_SMALL_INT(rhs_in)) {
				298	return MP_OBJ_NULL; // op not supported
				299	}
				300	int n = MP_OBJ_SMALL_INT_VALUE(rhs_in);
				301	byte *data;
				302	mp_obj_t s = mp_obj_str_builder_start(lhs_type, lhs_len * n, &data);
				303	mp_seq_multiply(lhs_data, sizeof(*lhs_data), lhs_len, n, data);
				304	return mp_obj_str_builder_end(s);
				305	}
				306
				307	case MP_BINARY_OP_MODULO: {
				308	mp_obj_t *args;
				309	uint n_args;
				310	mp_obj_t dict = MP_OBJ_NULL;
				311	if (MP_OBJ_IS_TYPE(rhs_in, &mp_type_tuple)) {
				312	// TODO: Support tuple subclasses?
				313	mp_obj_tuple_get(rhs_in, &n_args, &args);
				314	} else if (MP_OBJ_IS_TYPE(rhs_in, &mp_type_dict)) {
				315	args = NULL;
				316	n_args = 0;
				317	dict = rhs_in;
				318	} else {
				319	args = &rhs_in;
				320	n_args = 1;
				321	}
				322	return str_modulo_format(lhs_in, n_args, args, dict);
				323	}
				324
				325	//case MP_BINARY_OP_NOT_EQUAL: // This is never passed here
				326	case MP_BINARY_OP_EQUAL: // This will be passed only for bytes, str is dealt with in mp_obj_equal()
				327	case MP_BINARY_OP_LESS:
				328	case MP_BINARY_OP_LESS_EQUAL:
				329	case MP_BINARY_OP_MORE:
				330	case MP_BINARY_OP_MORE_EQUAL:
				331	if (lhs_type == rhs_type) {
				332	GET_STR_DATA_LEN(rhs_in, rhs_data, rhs_len);
				333	return MP_BOOL(mp_seq_cmp_bytes(op, lhs_data, lhs_len, rhs_data, rhs_len));
				334	}
				335	if (lhs_type == &mp_type_bytes) {
				336	mp_buffer_info_t bufinfo;
				337	if (!mp_get_buffer(rhs_in, &bufinfo, MP_BUFFER_READ)) {
				338	goto uncomparable;
				339	}
				340	return MP_BOOL(mp_seq_cmp_bytes(op, lhs_data, lhs_len, bufinfo.buf, bufinfo.len));
				341	}
				342	uncomparable:
				343	if (op == MP_BINARY_OP_EQUAL) {
				344	return mp_const_false;
				345	}
				346	}
				347
				348	return MP_OBJ_NULL; // op not supported
				349	}
				350
				351	STATIC mp_obj_t str_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) {
				352	mp_obj_type_t *type = mp_obj_get_type(self_in);
				353	GET_STR_DATA_LEN(self_in, self_data, self_len);
				354	if (value == MP_OBJ_SENTINEL) {
				355	// load
				356	#if MICROPY_PY_BUILTINS_SLICE
				357	if (MP_OBJ_IS_TYPE(index, &mp_type_slice)) {
				358	mp_bound_slice_t slice;
				359	if (!mp_seq_get_fast_slice_indexes(self_len, index, &slice)) {
				360	nlr_raise(mp_obj_new_exception_msg(&mp_type_NotImplementedError,
				361	"only slices with step=1 (aka None) are supported"));
				362	}
				363	return mp_obj_new_str_of_type(type, self_data + slice.start, slice.stop - slice.start);
				364	}
				365	#endif
				366	uint index_val = mp_get_index(type, self_len, index, false);
				367	if (type == &mp_type_bytes) {
				368	return MP_OBJ_NEW_SMALL_INT((mp_small_int_t)self_data[index_val]);
				369	} else {
				370	return mp_obj_new_str((char*)self_data + index_val, 1, true);
				371	}
				372	} else {
				373	return MP_OBJ_NULL; // op not supported
				374	}
				375	}
				376
				377	STATIC mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
				378	assert(is_str_or_bytes(self_in));
				379	const mp_obj_type_t *self_type = mp_obj_get_type(self_in);
				380
				381	// get separation string
				382	GET_STR_DATA_LEN(self_in, sep_str, sep_len);
				383
				384	// process args
				385	uint seq_len;
				386	mp_obj_t *seq_items;
				387	if (MP_OBJ_IS_TYPE(arg, &mp_type_tuple)) {
				388	mp_obj_tuple_get(arg, &seq_len, &seq_items);
				389	} else {
				390	if (!MP_OBJ_IS_TYPE(arg, &mp_type_list)) {
				391	// arg is not a list, try to convert it to one
				392	// TODO: Try to optimize?
				393	arg = mp_type_list.make_new((mp_obj_t)&mp_type_list, 1, 0, &arg);
				394	}
				395	mp_obj_list_get(arg, &seq_len, &seq_items);
				396	}
				397
				398	// count required length
				399	int required_len = 0;
				400	for (int i = 0; i < seq_len; i++) {
				401	if (mp_obj_get_type(seq_items[i]) != self_type) {
				402	nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError,
				403	"join expects a list of str/bytes objects consistent with self object"));
				404	}
				405	if (i > 0) {
				406	required_len += sep_len;
				407	}
				408	GET_STR_LEN(seq_items[i], l);
				409	required_len += l;
				410	}
				411
				412	// make joined string
				413	byte *data;
				414	mp_obj_t joined_str = mp_obj_str_builder_start(self_type, required_len, &data);
				415	for (int i = 0; i < seq_len; i++) {
				416	if (i > 0) {
				417	memcpy(data, sep_str, sep_len);
				418	data += sep_len;
				419	}
				420	GET_STR_DATA_LEN(seq_items[i], s, l);
				421	memcpy(data, s, l);
				422	data += l;
				423	}
				424
				425	// return joined string
				426	return mp_obj_str_builder_end(joined_str);
				427	}
				428
				429	#define is_ws(c) ((c) == ' ' \|\| (c) == '\t')
				430
				431	STATIC mp_obj_t str_split(uint n_args, const mp_obj_t *args) {
				432	const mp_obj_type_t *self_type = mp_obj_get_type(args[0]);
				433	machine_int_t splits = -1;
				434	mp_obj_t sep = mp_const_none;
				435	if (n_args > 1) {
				436	sep = args[1];
				437	if (n_args > 2) {
				438	splits = mp_obj_get_int(args[2]);
				439	}
				440	}
				441
				442	mp_obj_t res = mp_obj_new_list(0, NULL);
				443	GET_STR_DATA_LEN(args[0], s, len);
				444	const byte *top = s + len;
				445
				446	if (sep == mp_const_none) {
				447	// sep not given, so separate on whitespace
				448
				449	// Initial whitespace is not counted as split, so we pre-do it
				450	while (s < top && is_ws(*s)) s++;
				451	while (s < top && splits != 0) {
				452	const byte *start = s;
				453	while (s < top && !is_ws(*s)) s++;
				454	mp_obj_list_append(res, mp_obj_new_str_of_type(self_type, start, s - start));
				455	if (s >= top) {
				456	break;
				457	}
				458	while (s < top && is_ws(*s)) s++;
				459	if (splits > 0) {
				460	splits--;
				461	}
				462	}
				463
				464	if (s < top) {
				465	mp_obj_list_append(res, mp_obj_new_str_of_type(self_type, s, top - s));
				466	}
				467
				468	} else {
				469	// sep given
				470
				471	uint sep_len;
				472	const char *sep_str = mp_obj_str_get_data(sep, &sep_len);
				473
				474	if (sep_len == 0) {
				475	nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "empty separator"));
				476	}
				477
				478	for (;;) {
				479	const byte *start = s;
				480	for (;;) {
				481	if (splits == 0 \|\| s + sep_len > top) {
				482	s = top;
				483	break;
				484	} else if (memcmp(s, sep_str, sep_len) == 0) {
				485	break;
				486	}
				487	s++;
				488	}
				489	mp_obj_list_append(res, mp_obj_new_str_of_type(self_type, start, s - start));
				490	if (s >= top) {
				491	break;
				492	}
				493	s += sep_len;
				494	if (splits > 0) {
				495	splits--;
				496	}
				497	}
				498	}
				499
				500	return res;
				501	}
				502
				503	STATIC mp_obj_t str_rsplit(uint n_args, const mp_obj_t *args) {
				504	if (n_args < 3) {
				505	// If we don't have split limit, it doesn't matter from which side
				506	// we split.
				507	return str_split(n_args, args);
				508	}
				509	const mp_obj_type_t *self_type = mp_obj_get_type(args[0]);
				510	mp_obj_t sep = args[1];
				511	GET_STR_DATA_LEN(args[0], s, len);
				512
				513	machine_int_t splits = mp_obj_get_int(args[2]);
				514	machine_int_t org_splits = splits;
				515	// Preallocate list to the max expected # of elements, as we
				516	// will fill it from the end.
				517	mp_obj_list_t *res = mp_obj_new_list(splits + 1, NULL);
				518	int idx = splits;
				519
				520	if (sep == mp_const_none) {
				521	assert(!"TODO: rsplit(None,n) not implemented");
				522	} else {
				523	uint sep_len;
				524	const char *sep_str = mp_obj_str_get_data(sep, &sep_len);
				525
				526	if (sep_len == 0) {
				527	nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "empty separator"));
				528	}
				529
				530	const byte *beg = s;
				531	const byte *last = s + len;
				532	for (;;) {
				533	s = last - sep_len;
				534	for (;;) {
				535	if (splits == 0 \|\| s < beg) {
				536	break;
				537	} else if (memcmp(s, sep_str, sep_len) == 0) {
				538	break;
				539	}
				540	s--;
				541	}
				542	if (s < beg \|\| splits == 0) {
				543	res->items[idx] = mp_obj_new_str_of_type(self_type, beg, last - beg);
				544	break;
				545	}
				546	res->items[idx--] = mp_obj_new_str_of_type(self_type, s + sep_len, last - s - sep_len);
				547	last = s;
				548	if (splits > 0) {
				549	splits--;
				550	}
				551	}
				552	if (idx != 0) {
				553	// We split less parts than split limit, now go cleanup surplus
				554	int used = org_splits + 1 - idx;
				555	memcpy(res->items, &res->items[idx], used * sizeof(mp_obj_t));
				556	mp_seq_clear(res->items, used, res->alloc, sizeof(*res->items));
				557	res->len = used;
				558	}
				559	}
				560
				561	return res;
				562	}
				563
				564
				565	STATIC mp_obj_t str_finder(uint n_args, const mp_obj_t *args, machine_int_t direction, bool is_index) {
				566	assert(2 <= n_args && n_args <= 4);
				567	assert(MP_OBJ_IS_STR(args[0]));
				568	assert(MP_OBJ_IS_STR(args[1]));
				569
				570	GET_STR_DATA_LEN(args[0], haystack, haystack_len);
				571	GET_STR_DATA_LEN(args[1], needle, needle_len);
				572
				573	machine_uint_t start = 0;
				574	machine_uint_t end = haystack_len;
				575	if (n_args >= 3 && args[2] != mp_const_none) {
				576	start = mp_get_index(&mp_type_str, haystack_len, args[2], true);
				577	}
				578	if (n_args >= 4 && args[3] != mp_const_none) {
				579	end = mp_get_index(&mp_type_str, haystack_len, args[3], true);
				580	}
				581
				582	const byte *p = find_subbytes(haystack + start, end - start, needle, needle_len, direction);
				583	if (p == NULL) {
				584	// not found
				585	if (is_index) {
				586	nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "substring not found"));
				587	} else {
				588	return MP_OBJ_NEW_SMALL_INT(-1);
				589	}
				590	} else {
				591	// found
				592	return MP_OBJ_NEW_SMALL_INT(p - haystack);
				593	}
				594	}
				595
				596	STATIC mp_obj_t str_find(uint n_args, const mp_obj_t *args) {
				597	return str_finder(n_args, args, 1, false);
				598	}
				599
				600	STATIC mp_obj_t str_rfind(uint n_args, const mp_obj_t *args) {
				601	return str_finder(n_args, args, -1, false);
				602	}
				603
				604	STATIC mp_obj_t str_index(uint n_args, const mp_obj_t *args) {
				605	return str_finder(n_args, args, 1, true);
				606	}
				607
				608	STATIC mp_obj_t str_rindex(uint n_args, const mp_obj_t *args) {
				609	return str_finder(n_args, args, -1, true);
				610	}
				611
				612	// TODO: (Much) more variety in args
				613	STATIC mp_obj_t str_startswith(uint n_args, const mp_obj_t *args) {
				614	GET_STR_DATA_LEN(args[0], str, str_len);
				615	GET_STR_DATA_LEN(args[1], prefix, prefix_len);
				616	uint index_val = 0;
				617	if (n_args > 2) {
				618	index_val = mp_get_index(&mp_type_str, str_len, args[2], true);
				619	}
				620	if (prefix_len + index_val > str_len) {
				621	return mp_const_false;
				622	}
				623	return MP_BOOL(memcmp(str + index_val, prefix, prefix_len) == 0);
				624	}
				625
				626	STATIC mp_obj_t str_endswith(uint n_args, const mp_obj_t *args) {
				627	GET_STR_DATA_LEN(args[0], str, str_len);
				628	GET_STR_DATA_LEN(args[1], suffix, suffix_len);
				629	assert(n_args == 2);
				630
				631	if (suffix_len > str_len) {
				632	return mp_const_false;
				633	}
				634	return MP_BOOL(memcmp(str + (str_len - suffix_len), suffix, suffix_len) == 0);
				635	}
				636
				637	enum { LSTRIP, RSTRIP, STRIP };
				638
				639	STATIC mp_obj_t str_uni_strip(int type, uint n_args, const mp_obj_t *args) {
				640	assert(1 <= n_args && n_args <= 2);
				641	assert(is_str_or_bytes(args[0]));
				642	const mp_obj_type_t *self_type = mp_obj_get_type(args[0]);
				643
				644	const byte *chars_to_del;
				645	uint chars_to_del_len;
				646	static const byte whitespace[] = " \t\n\r\v\f";
				647
				648	if (n_args == 1) {
				649	chars_to_del = whitespace;
				650	chars_to_del_len = sizeof(whitespace);
				651	} else {
				652	if (mp_obj_get_type(args[1]) != self_type) {
				653	arg_type_mixup();
				654	}
				655	GET_STR_DATA_LEN(args[1], s, l);
				656	chars_to_del = s;
				657	chars_to_del_len = l;
				658	}
				659
				660	GET_STR_DATA_LEN(args[0], orig_str, orig_str_len);
				661
				662	machine_uint_t first_good_char_pos = 0;
				663	bool first_good_char_pos_set = false;
				664	machine_uint_t last_good_char_pos = 0;
				665	machine_uint_t i = 0;
				666	machine_int_t delta = 1;
				667	if (type == RSTRIP) {
				668	i = orig_str_len - 1;
				669	delta = -1;
				670	}
				671	for (machine_uint_t len = orig_str_len; len > 0; len--) {
				672	if (find_subbytes(chars_to_del, chars_to_del_len, &orig_str[i], 1, 1) == NULL) {
				673	if (!first_good_char_pos_set) {
				674	first_good_char_pos_set = true;
				675	first_good_char_pos = i;
				676	if (type == LSTRIP) {
				677	last_good_char_pos = orig_str_len - 1;
				678	break;
				679	} else if (type == RSTRIP) {
				680	first_good_char_pos = 0;
				681	last_good_char_pos = i;
				682	break;
				683	}
				684	}
				685	last_good_char_pos = i;
				686	}
				687	i += delta;
				688	}
				689
				690	if (!first_good_char_pos_set) {
				691	// string is all whitespace, return ''
				692	return MP_OBJ_NEW_QSTR(MP_QSTR_);
				693	}
				694
				695	assert(last_good_char_pos >= first_good_char_pos);
				696	//+1 to accomodate the last character
				697	machine_uint_t stripped_len = last_good_char_pos - first_good_char_pos + 1;
				698	if (stripped_len == orig_str_len) {
				699	// If nothing was stripped, don't bother to dup original string
				700	// TODO: watch out for this case when we'll get to bytearray.strip()
				701	assert(first_good_char_pos == 0);
				702	return args[0];
				703	}
				704	return mp_obj_new_str_of_type(self_type, orig_str + first_good_char_pos, stripped_len);
				705	}
				706
				707	STATIC mp_obj_t str_strip(uint n_args, const mp_obj_t *args) {
				708	return str_uni_strip(STRIP, n_args, args);
				709	}
				710
				711	STATIC mp_obj_t str_lstrip(uint n_args, const mp_obj_t *args) {
				712	return str_uni_strip(LSTRIP, n_args, args);
				713	}
				714
				715	STATIC mp_obj_t str_rstrip(uint n_args, const mp_obj_t *args) {
				716	return str_uni_strip(RSTRIP, n_args, args);
				717	}
				718
				719	// Takes an int arg, but only parses unsigned numbers, and only changes
				720	// *num if at least one digit was parsed.
				721	static int str_to_int(const char str, int num) {
				722	const char *s = str;
				723	if (unichar_isdigit(*s)) {
				724	*num = 0;
				725	do {
				726	num = num * 10 + (*s - '0');
				727	s++;
				728	}
				729	while (unichar_isdigit(*s));
				730	}
				731	return s - str;
				732	}
				733
				734	static bool isalignment(char ch) {
				735	return ch && strchr("<>=^", ch) != NULL;
				736	}
				737
				738	static bool istype(char ch) {
				739	return ch && strchr("bcdeEfFgGnosxX%", ch) != NULL;
				740	}
				741
				742	static bool arg_looks_integer(mp_obj_t arg) {
				743	return MP_OBJ_IS_TYPE(arg, &mp_type_bool) \|\| MP_OBJ_IS_INT(arg);
				744	}
				745
				746	static bool arg_looks_numeric(mp_obj_t arg) {
				747	return arg_looks_integer(arg)
				748	#if MICROPY_PY_BUILTINS_FLOAT
				749	\|\| MP_OBJ_IS_TYPE(arg, &mp_type_float)
				750	#endif
				751	;
				752	}
				753
				754	static mp_obj_t arg_as_int(mp_obj_t arg) {
				755	#if MICROPY_PY_BUILTINS_FLOAT
				756	if (MP_OBJ_IS_TYPE(arg, &mp_type_float)) {
				757
				758	// TODO: Needs a way to construct an mpz integer from a float
				759
				760	mp_small_int_t num = mp_obj_get_float(arg);
				761	return MP_OBJ_NEW_SMALL_INT(num);
				762	}
				763	#endif
				764	return arg;
				765	}
				766
				767	mp_obj_t mp_obj_str_format(uint n_args, const mp_obj_t *args) {
				768	assert(MP_OBJ_IS_STR(args[0]));
				769
				770	GET_STR_DATA_LEN(args[0], str, len);
				771	int arg_i = 0;
				772	vstr_t *vstr = vstr_new();
				773	pfenv_t pfenv_vstr;
				774	pfenv_vstr.data = vstr;
				775	pfenv_vstr.print_strn = pfenv_vstr_add_strn;
				776
				777	for (const byte *top = str + len; str < top; str++) {
				778	if (*str == '}') {
				779	str++;
				780	if (str < top && *str == '}') {
				781	vstr_add_char(vstr, '}');
				782	continue;
				783	}
				784	nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "single '}' encountered in format string"));
				785	}
				786	if (*str != '{') {
				787	vstr_add_char(vstr, *str);
				788	continue;
				789	}
				790
				791	str++;
				792	if (str < top && *str == '{') {
				793	vstr_add_char(vstr, '{');
				794	continue;
				795	}
				796
				797	// replacement_field ::= "{" [field_name] ["!" conversion] [":" format_spec] "}"
				798
				799	vstr_t *field_name = NULL;
				800	char conversion = '\0';
				801	vstr_t *format_spec = NULL;
				802
				803	if (str < top && str != '}' && str != '!' && *str != ':') {
				804	field_name = vstr_new();
				805	while (str < top && str != '}' && str != '!' && *str != ':') {
				806	vstr_add_char(field_name, *str++);
				807	}
				808	vstr_add_char(field_name, '\0');
				809	}
				810
				811	// conversion ::= "r" \| "s"
				812
				813	if (str < top && *str == '!') {
				814	str++;
				815	if (str < top && (str == 'r' \|\| str == 's')) {
				816	conversion = *str++;
				817	} else {
				818	nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "end of format while looking for conversion specifier"));
				819	}
				820	}
				821
				822	if (str < top && *str == ':') {
				823	str++;
				824	// {:} is the same as {}, which is the same as {!s}
				825	// This makes a difference when passing in a True or False
				826	// '{}'.format(True) returns 'True'
				827	// '{:d}'.format(True) returns '1'
				828	// So we treat {:} as {} and this later gets treated to be {!s}
				829	if (*str != '}') {
				830	format_spec = vstr_new();
				831	while (str < top && *str != '}') {
				832	vstr_add_char(format_spec, *str++);
				833	}
				834	vstr_add_char(format_spec, '\0');
				835	}
				836	}
				837	if (str >= top) {
				838	nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "unmatched '{' in format"));
				839	}
				840	if (*str != '}') {
				841	nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "expected ':' after format specifier"));
				842	}
				843
				844	mp_obj_t arg = mp_const_none;
				845
				846	if (field_name) {
				847	if (arg_i > 0) {
				848	nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "can't switch from automatic field numbering to manual field specification"));
				849	}
				850	int index = 0;
				851	if (str_to_int(vstr_str(field_name), &index) != vstr_len(field_name) - 1) {
				852	nlr_raise(mp_obj_new_exception_msg(&mp_type_KeyError, "attributes not supported yet"));
				853	}
				854	if (index >= n_args - 1) {
				855	nlr_raise(mp_obj_new_exception_msg(&mp_type_IndexError, "tuple index out of range"));
				856	}
				857	arg = args[index + 1];
				858	arg_i = -1;
				859	vstr_free(field_name);
				860	field_name = NULL;
				861	} else {
				862	if (arg_i < 0) {
				863	nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "can't switch from manual field specification to automatic field numbering"));
				864	}
				865	if (arg_i >= n_args - 1) {
				866	nlr_raise(mp_obj_new_exception_msg(&mp_type_IndexError, "tuple index out of range"));
				867	}
				868	arg = args[arg_i + 1];
				869	arg_i++;
				870	}
				871	if (!format_spec && !conversion) {
				872	conversion = 's';
				873	}
				874	if (conversion) {
				875	mp_print_kind_t print_kind;
				876	if (conversion == 's') {
				877	print_kind = PRINT_STR;
				878	} else if (conversion == 'r') {
				879	print_kind = PRINT_REPR;
				880	} else {
				881	nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError, "unknown conversion specifier %c", conversion));
				882	}
				883	vstr_t *arg_vstr = vstr_new();
				884	mp_obj_print_helper((void ()(void, const char*, ...))vstr_printf, arg_vstr, arg, print_kind);
				885	arg = mp_obj_new_str(vstr_str(arg_vstr), vstr_len(arg_vstr), false);
				886	vstr_free(arg_vstr);
				887	}
				888
				889	char sign = '\0';
				890	char fill = '\0';
				891	char align = '\0';
				892	int width = -1;
				893	int precision = -1;
				894	char type = '\0';
				895	int flags = 0;
				896
				897	if (format_spec) {
				898	// The format specifier (from http://docs.python.org/2/library/string.html#formatspec)
				899	//
				900	// [[fill]align][sign][#][0][width][,][.precision][type]
				901	// fill ::= <any character>
				902	// align ::= "<" \| ">" \| "=" \| "^"
				903	// sign ::= "+" \| "-" \| " "
				904	// width ::= integer
				905	// precision ::= integer
				906	// type ::= "b" \| "c" \| "d" \| "e" \| "E" \| "f" \| "F" \| "g" \| "G" \| "n" \| "o" \| "s" \| "x" \| "X" \| "%"
				907
				908	const char *s = vstr_str(format_spec);
				909	if (isalignment(*s)) {
				910	align = *s++;
				911	} else if (*s && isalignment(s[1])) {
				912	fill = *s++;
				913	align = *s++;
				914	}
				915	if (s == '+' \|\| s == '-' \|\| *s == ' ') {
				916	if (*s == '+') {
				917	flags \|= PF_FLAG_SHOW_SIGN;
				918	} else if (*s == ' ') {
				919	flags \|= PF_FLAG_SPACE_SIGN;
				920	}
				921	sign = *s++;
				922	}
				923	if (*s == '#') {
				924	flags \|= PF_FLAG_SHOW_PREFIX;
				925	s++;
				926	}
				927	if (*s == '0') {
				928	if (!align) {
				929	align = '=';
				930	}
				931	if (!fill) {
				932	fill = '0';
				933	}
				934	}
				935	s += str_to_int(s, &width);
				936	if (*s == ',') {
				937	flags \|= PF_FLAG_SHOW_COMMA;
				938	s++;
				939	}
				940	if (*s == '.') {
				941	s++;
				942	s += str_to_int(s, &precision);
				943	}
				944	if (istype(*s)) {
				945	type = *s++;
				946	}
				947	if (*s) {
				948	nlr_raise(mp_obj_new_exception_msg(&mp_type_KeyError, "Invalid conversion specification"));
				949	}
				950	vstr_free(format_spec);
				951	format_spec = NULL;
				952	}
				953	if (!align) {
				954	if (arg_looks_numeric(arg)) {
				955	align = '>';
				956	} else {
				957	align = '<';
				958	}
				959	}
				960	if (!fill) {
				961	fill = ' ';
				962	}
				963
				964	if (sign) {
				965	if (type == 's') {
				966	nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "Sign not allowed in string format specifier"));
				967	}
				968	if (type == 'c') {
				969	nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "Sign not allowed with integer format specifier 'c'"));
				970	}
				971	} else {
				972	sign = '-';
				973	}
				974
				975	switch (align) {
				976	case '<': flags \|= PF_FLAG_LEFT_ADJUST; break;
				977	case '=': flags \|= PF_FLAG_PAD_AFTER_SIGN; break;
				978	case '^': flags \|= PF_FLAG_CENTER_ADJUST; break;
				979	}
				980
				981	if (arg_looks_integer(arg)) {
				982	switch (type) {
				983	case 'b':
				984	pfenv_print_mp_int(&pfenv_vstr, arg, 1, 2, 'a', flags, fill, width, 0);
				985	continue;
				986
				987	case 'c':
				988	{
				989	char ch = mp_obj_get_int(arg);
				990	pfenv_print_strn(&pfenv_vstr, &ch, 1, flags, fill, width);
				991	continue;
				992	}
				993
				994	case '\0': // No explicit format type implies 'd'
				995	case 'n': // I don't think we support locales in uPy so use 'd'
				996	case 'd':
				997	pfenv_print_mp_int(&pfenv_vstr, arg, 1, 10, 'a', flags, fill, width, 0);
				998	continue;
				999
				1000	case 'o':
				1001	if (flags & PF_FLAG_SHOW_PREFIX) {
				1002	flags \|= PF_FLAG_SHOW_OCTAL_LETTER;
				1003	}
				1004
				1005	pfenv_print_mp_int(&pfenv_vstr, arg, 1, 8, 'a', flags, fill, width, 0);
				1006	continue;
				1007
				1008	case 'X':
				1009	case 'x':
				1010	pfenv_print_mp_int(&pfenv_vstr, arg, 1, 16, type - ('X' - 'A'), flags, fill, width, 0);
				1011	continue;
				1012
				1013	case 'e':
				1014	case 'E':
				1015	case 'f':
				1016	case 'F':
				1017	case 'g':
				1018	case 'G':
				1019	case '%':
				1020	// The floating point formatters all work with anything that
				1021	// looks like an integer
				1022	break;
				1023
				1024	default:
				1025	nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError,
				1026	"unknown format code '%c' for object of type '%s'", type, mp_obj_get_type_str(arg)));
				1027	}
				1028	}
				1029
				1030	// NOTE: no else here. We need the e, f, g etc formats for integer
				1031	// arguments (from above if) to take this if.
				1032	if (arg_looks_numeric(arg)) {
				1033	if (!type) {
				1034
				1035	// Even though the docs say that an unspecified type is the same
				1036	// as 'g', there is one subtle difference, when the exponent
				1037	// is one less than the precision.
				1038	//
				1039	// '{:10.1}'.format(0.0) ==> '0e+00'
				1040	// '{:10.1g}'.format(0.0) ==> '0'
				1041	//
				1042	// TODO: Figure out how to deal with this.
				1043	//
				1044	// A proper solution would involve adding a special flag
				1045	// or something to format_float, and create a format_double
				1046	// to deal with doubles. In order to fix this when using
				1047	// sprintf, we'd need to use the e format and tweak the
				1048	// returned result to strip trailing zeros like the g format
				1049	// does.
				1050	//
				1051	// {:10.3} and {:10.2e} with 1.23e2 both produce 1.23e+02
				1052	// but with 1.e2 you get 1e+02 and 1.00e+02
				1053	//
				1054	// Stripping the trailing 0's (like g) does would make the
				1055	// e format give us the right format.
				1056	//
				1057	// CPython sources say:
				1058	// Omitted type specifier. Behaves in the same way as repr(x)
				1059	// and str(x) if no precision is given, else like 'g', but with
				1060	// at least one digit after the decimal point. */
				1061
				1062	type = 'g';
				1063	}
				1064	if (type == 'n') {
				1065	type = 'g';
				1066	}
				1067
				1068	flags \|= PF_FLAG_PAD_NAN_INF; // '{:06e}'.format(float('-inf')) should give '-00inf'
				1069	switch (type) {
				1070	#if MICROPY_PY_BUILTINS_FLOAT
				1071	case 'e':
				1072	case 'E':
				1073	case 'f':
				1074	case 'F':
				1075	case 'g':
				1076	case 'G':
				1077	pfenv_print_float(&pfenv_vstr, mp_obj_get_float(arg), type, flags, fill, width, precision);
				1078	break;
				1079
				1080	case '%':
				1081	flags \|= PF_FLAG_ADD_PERCENT;
				1082	pfenv_print_float(&pfenv_vstr, mp_obj_get_float(arg) * 100.0F, 'f', flags, fill, width, precision);
				1083	break;
				1084	#endif
				1085
				1086	default:
				1087	nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError,
				1088	"unknown format code '%c' for object of type 'float'",
				1089	type, mp_obj_get_type_str(arg)));
				1090	}
				1091	} else {
				1092	// arg doesn't look like a number
				1093
				1094	if (align == '=') {
				1095	nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "'=' alignment not allowed in string format specifier"));
				1096	}
				1097
				1098	switch (type) {
				1099	case '\0':
				1100	mp_obj_print_helper((void ()(void, const char*, ...))vstr_printf, vstr, arg, PRINT_STR);
				1101	break;
				1102
				1103	case 's':
				1104	{
				1105	uint len;
				1106	const char *s = mp_obj_str_get_data(arg, &len);
				1107	if (precision < 0) {
				1108	precision = len;
				1109	}
				1110	if (len > precision) {
				1111	len = precision;
				1112	}
				1113	pfenv_print_strn(&pfenv_vstr, s, len, flags, fill, width);
				1114	break;
				1115	}
				1116
				1117	default:
				1118	nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError,
				1119	"unknown format code '%c' for object of type 'str'",
				1120	type, mp_obj_get_type_str(arg)));
				1121	}
				1122	}
				1123	}
				1124
				1125	mp_obj_t s = mp_obj_new_str(vstr->buf, vstr->len, false);
				1126	vstr_free(vstr);
				1127	return s;
				1128	}
				1129
				1130	STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, uint n_args, const mp_obj_t *args, mp_obj_t dict) {
				1131	assert(MP_OBJ_IS_STR(pattern));
				1132
				1133	GET_STR_DATA_LEN(pattern, str, len);
				1134	const byte *start_str = str;
				1135	int arg_i = 0;
				1136	vstr_t *vstr = vstr_new();
				1137	pfenv_t pfenv_vstr;
				1138	pfenv_vstr.data = vstr;
				1139	pfenv_vstr.print_strn = pfenv_vstr_add_strn;
				1140
				1141	for (const byte *top = str + len; str < top; str++) {
				1142	mp_obj_t arg = MP_OBJ_NULL;
				1143	if (*str != '%') {
				1144	vstr_add_char(vstr, *str);
				1145	continue;
				1146	}
				1147	if (++str >= top) {
				1148	break;
				1149	}
				1150	if (*str == '%') {
				1151	vstr_add_char(vstr, '%');
				1152	continue;
				1153	}
				1154
				1155	// Dictionary value lookup
				1156	if (*str == '(') {
				1157	const byte *key = ++str;
				1158	while (*str != ')') {
				1159	if (str >= top) {
				1160	nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "incomplete format key"));
				1161	}
				1162	++str;
				1163	}
				1164	mp_obj_t k_obj = mp_obj_new_str((const char*)key, str - key, true);
				1165	arg = mp_obj_dict_get(dict, k_obj);
				1166	str++;
				1167	}
				1168
				1169	int flags = 0;
				1170	char fill = ' ';
				1171	int alt = 0;
				1172	while (str < top) {
				1173	if (*str == '-') flags \|= PF_FLAG_LEFT_ADJUST;
				1174	else if (*str == '+') flags \|= PF_FLAG_SHOW_SIGN;
				1175	else if (*str == ' ') flags \|= PF_FLAG_SPACE_SIGN;
				1176	else if (*str == '#') alt = PF_FLAG_SHOW_PREFIX;
				1177	else if (*str == '0') {
				1178	flags \|= PF_FLAG_PAD_AFTER_SIGN;
				1179	fill = '0';
				1180	} else break;
				1181	str++;
				1182	}
				1183	// parse width, if it exists
				1184	int width = 0;
				1185	if (str < top) {
				1186	if (str == '') {
				1187	if (arg_i >= n_args) {
				1188	goto not_enough_args;
				1189	}
				1190	width = mp_obj_get_int(args[arg_i++]);
				1191	str++;
				1192	} else {
				1193	for (; str < top && '0' <= str && str <= '9'; str++) {
				1194	width = width * 10 + *str - '0';
				1195	}
				1196	}
				1197	}
				1198	int prec = -1;
				1199	if (str < top && *str == '.') {
				1200	if (++str < top) {
				1201	if (str == '') {
				1202	if (arg_i >= n_args) {
				1203	goto not_enough_args;
				1204	}
				1205	prec = mp_obj_get_int(args[arg_i++]);
				1206	str++;
				1207	} else {
				1208	prec = 0;
				1209	for (; str < top && '0' <= str && str <= '9'; str++) {
				1210	prec = prec * 10 + *str - '0';
				1211	}
				1212	}
				1213	}
				1214	}
				1215
				1216	if (str >= top) {
				1217	nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "incomplete format"));
				1218	}
				1219
				1220	// Tuple value lookup
				1221	if (arg == MP_OBJ_NULL) {
				1222	if (arg_i >= n_args) {
				1223	not_enough_args:
				1224	nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "not enough arguments for format string"));
				1225	}
				1226	arg = args[arg_i++];
				1227	}
				1228	switch (*str) {
				1229	case 'c':
				1230	if (MP_OBJ_IS_STR(arg)) {
				1231	uint len;
				1232	const char *s = mp_obj_str_get_data(arg, &len);
				1233	if (len != 1) {
				1234	nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "%%c requires int or char"));
				1235	break;
				1236	}
				1237	pfenv_print_strn(&pfenv_vstr, s, 1, flags, ' ', width);
				1238	break;
				1239	}
				1240	if (arg_looks_integer(arg)) {
				1241	char ch = mp_obj_get_int(arg);
				1242	pfenv_print_strn(&pfenv_vstr, &ch, 1, flags, ' ', width);
				1243	break;
				1244	}
				1245	#if MICROPY_PY_BUILTINS_FLOAT
				1246	// This is what CPython reports, so we report the same.
				1247	if (MP_OBJ_IS_TYPE(arg, &mp_type_float)) {
				1248	nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "integer argument expected, got float"));
				1249
				1250	}
				1251	#endif
				1252	nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "an integer is required"));
				1253	break;
				1254
				1255	case 'd':
				1256	case 'i':
				1257	case 'u':
				1258	pfenv_print_mp_int(&pfenv_vstr, arg_as_int(arg), 1, 10, 'a', flags, fill, width, prec);
				1259	break;
				1260
				1261	#if MICROPY_PY_BUILTINS_FLOAT
				1262	case 'e':
				1263	case 'E':
				1264	case 'f':
				1265	case 'F':
				1266	case 'g':
				1267	case 'G':
				1268	pfenv_print_float(&pfenv_vstr, mp_obj_get_float(arg), *str, flags, fill, width, prec);
				1269	break;
				1270	#endif
				1271
				1272	case 'o':
				1273	if (alt) {
				1274	flags \|= (PF_FLAG_SHOW_PREFIX \| PF_FLAG_SHOW_OCTAL_LETTER);
				1275	}
				1276	pfenv_print_mp_int(&pfenv_vstr, arg, 1, 8, 'a', flags, fill, width, prec);
				1277	break;
				1278
				1279	case 'r':
				1280	case 's':
				1281	{
				1282	vstr_t *arg_vstr = vstr_new();
				1283	mp_obj_print_helper((void ()(void, const char*, ...))vstr_printf,
				1284	arg_vstr, arg, *str == 'r' ? PRINT_REPR : PRINT_STR);
				1285	uint len = vstr_len(arg_vstr);
				1286	if (prec < 0) {
				1287	prec = len;
				1288	}
				1289	if (len > prec) {
				1290	len = prec;
				1291	}
				1292	pfenv_print_strn(&pfenv_vstr, vstr_str(arg_vstr), len, flags, ' ', width);
				1293	vstr_free(arg_vstr);
				1294	break;
				1295	}
				1296
				1297	case 'X':
				1298	case 'x':
				1299	pfenv_print_mp_int(&pfenv_vstr, arg, 1, 16, *str - ('X' - 'A'), flags \| alt, fill, width, prec);
				1300	break;
				1301
				1302	default:
				1303	nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError,
				1304	"unsupported format character '%c' (0x%x) at index %d",
				1305	str, str, str - start_str));
				1306	}
				1307	}
				1308
				1309	if (arg_i != n_args) {
				1310	nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "not all arguments converted during string formatting"));
				1311	}
				1312
				1313	mp_obj_t s = mp_obj_new_str(vstr->buf, vstr->len, false);
				1314	vstr_free(vstr);
				1315	return s;
				1316	}
				1317
				1318	STATIC mp_obj_t str_replace(uint n_args, const mp_obj_t *args) {
				1319	assert(MP_OBJ_IS_STR(args[0]));
				1320
				1321	machine_int_t max_rep = -1;
				1322	if (n_args == 4) {
				1323	max_rep = mp_obj_get_int(args[3]);
				1324	if (max_rep == 0) {
				1325	return args[0];
				1326	} else if (max_rep < 0) {
				1327	max_rep = -1;
				1328	}
				1329	}
				1330
				1331	// if max_rep is still -1 by this point we will need to do all possible replacements
				1332
				1333	// check argument types
				1334
				1335	if (!MP_OBJ_IS_STR(args[1])) {
				1336	bad_implicit_conversion(args[1]);
				1337	}
				1338
				1339	if (!MP_OBJ_IS_STR(args[2])) {
				1340	bad_implicit_conversion(args[2]);
				1341	}
				1342
				1343	// extract string data
				1344
				1345	GET_STR_DATA_LEN(args[0], str, str_len);
				1346	GET_STR_DATA_LEN(args[1], old, old_len);
				1347	GET_STR_DATA_LEN(args[2], new, new_len);
				1348
				1349	// old won't exist in str if it's longer, so nothing to replace
				1350	if (old_len > str_len) {
				1351	return args[0];
				1352	}
				1353
				1354	// data for the replaced string
				1355	byte *data = NULL;
				1356	mp_obj_t replaced_str = MP_OBJ_NULL;
				1357
				1358	// do 2 passes over the string:
				1359	// first pass computes the required length of the replaced string
				1360	// second pass does the replacements
				1361	for (;;) {
				1362	machine_uint_t replaced_str_index = 0;
				1363	machine_uint_t num_replacements_done = 0;
				1364	const byte *old_occurrence;
				1365	const byte *offset_ptr = str;
				1366	machine_uint_t str_len_remain = str_len;
				1367	if (old_len == 0) {
				1368	// if old_str is empty, copy new_str to start of replaced string
				1369	// copy the replacement string
				1370	if (data != NULL) {
				1371	memcpy(data, new, new_len);
				1372	}
				1373	replaced_str_index += new_len;
				1374	num_replacements_done++;
				1375	}
				1376	while (num_replacements_done != max_rep && str_len_remain > 0 && (old_occurrence = find_subbytes(offset_ptr, str_len_remain, old, old_len, 1)) != NULL) {
				1377	if (old_len == 0) {
				1378	old_occurrence += 1;
				1379	}
				1380	// copy from just after end of last occurrence of to-be-replaced string to right before start of next occurrence
				1381	if (data != NULL) {
				1382	memcpy(data + replaced_str_index, offset_ptr, old_occurrence - offset_ptr);
				1383	}
				1384	replaced_str_index += old_occurrence - offset_ptr;
				1385	// copy the replacement string
				1386	if (data != NULL) {
				1387	memcpy(data + replaced_str_index, new, new_len);
				1388	}
				1389	replaced_str_index += new_len;
				1390	offset_ptr = old_occurrence + old_len;
				1391	str_len_remain = str + str_len - offset_ptr;
				1392	num_replacements_done++;
				1393	}
				1394
				1395	// copy from just after end of last occurrence of to-be-replaced string to end of old string
				1396	if (data != NULL) {
				1397	memcpy(data + replaced_str_index, offset_ptr, str_len_remain);
				1398	}
				1399	replaced_str_index += str_len_remain;
				1400
				1401	if (data == NULL) {
				1402	// first pass
				1403	if (num_replacements_done == 0) {
				1404	// no substr found, return original string
				1405	return args[0];
				1406	} else {
				1407	// substr found, allocate new string
				1408	replaced_str = mp_obj_str_builder_start(mp_obj_get_type(args[0]), replaced_str_index, &data);
				1409	assert(data != NULL);
				1410	}
				1411	} else {
				1412	// second pass, we are done
				1413	break;
				1414	}
				1415	}
				1416
				1417	return mp_obj_str_builder_end(replaced_str);
				1418	}
				1419
				1420	STATIC mp_obj_t str_count(uint n_args, const mp_obj_t *args) {
				1421	assert(2 <= n_args && n_args <= 4);
				1422	assert(MP_OBJ_IS_STR(args[0]));
				1423	assert(MP_OBJ_IS_STR(args[1]));
				1424
				1425	GET_STR_DATA_LEN(args[0], haystack, haystack_len);
				1426	GET_STR_DATA_LEN(args[1], needle, needle_len);
				1427
				1428	machine_uint_t start = 0;
				1429	machine_uint_t end = haystack_len;
				1430	if (n_args >= 3 && args[2] != mp_const_none) {
				1431	start = mp_get_index(&mp_type_str, haystack_len, args[2], true);
				1432	}
				1433	if (n_args >= 4 && args[3] != mp_const_none) {
				1434	end = mp_get_index(&mp_type_str, haystack_len, args[3], true);
				1435	}
				1436
				1437	// if needle_len is zero then we count each gap between characters as an occurrence
				1438	if (needle_len == 0) {
				1439	return MP_OBJ_NEW_SMALL_INT(end - start + 1);
				1440	}
				1441
				1442	// count the occurrences
				1443	machine_int_t num_occurrences = 0;
				1444	for (machine_uint_t haystack_index = start; haystack_index + needle_len <= end; haystack_index++) {
				1445	if (memcmp(&haystack[haystack_index], needle, needle_len) == 0) {
				1446	num_occurrences++;
				1447	haystack_index += needle_len - 1;
				1448	}
				1449	}
				1450
				1451	return MP_OBJ_NEW_SMALL_INT(num_occurrences);
				1452	}
				1453
				1454	STATIC mp_obj_t str_partitioner(mp_obj_t self_in, mp_obj_t arg, machine_int_t direction) {
				1455	if (!is_str_or_bytes(self_in)) {
				1456	assert(0);
				1457	}
				1458	mp_obj_type_t *self_type = mp_obj_get_type(self_in);
				1459	if (self_type != mp_obj_get_type(arg)) {
				1460	arg_type_mixup();
				1461	}
				1462
				1463	GET_STR_DATA_LEN(self_in, str, str_len);
				1464	GET_STR_DATA_LEN(arg, sep, sep_len);
				1465
				1466	if (sep_len == 0) {
				1467	nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "empty separator"));
				1468	}
				1469
				1470	mp_obj_t result[] = {MP_OBJ_NEW_QSTR(MP_QSTR_), MP_OBJ_NEW_QSTR(MP_QSTR_), MP_OBJ_NEW_QSTR(MP_QSTR_)};
				1471
				1472	if (direction > 0) {
				1473	result[0] = self_in;
				1474	} else {
				1475	result[2] = self_in;
				1476	}
				1477
				1478	const byte *position_ptr = find_subbytes(str, str_len, sep, sep_len, direction);
				1479	if (position_ptr != NULL) {
				1480	machine_uint_t position = position_ptr - str;
				1481	result[0] = mp_obj_new_str_of_type(self_type, str, position);
				1482	result[1] = arg;
				1483	result[2] = mp_obj_new_str_of_type(self_type, str + position + sep_len, str_len - position - sep_len);
				1484	}
				1485
				1486	return mp_obj_new_tuple(3, result);
				1487	}
				1488
				1489	STATIC mp_obj_t str_partition(mp_obj_t self_in, mp_obj_t arg) {
				1490	return str_partitioner(self_in, arg, 1);
				1491	}
				1492
				1493	STATIC mp_obj_t str_rpartition(mp_obj_t self_in, mp_obj_t arg) {
				1494	return str_partitioner(self_in, arg, -1);
				1495	}
				1496
				1497	// Supposedly not too critical operations, so optimize for code size
				1498	STATIC mp_obj_t str_caseconv(unichar (*op)(unichar), mp_obj_t self_in) {
				1499	GET_STR_DATA_LEN(self_in, self_data, self_len);
				1500	byte *data;
				1501	mp_obj_t s = mp_obj_str_builder_start(mp_obj_get_type(self_in), self_len, &data);
				1502	for (int i = 0; i < self_len; i++) {
				1503	data++ = op(self_data++);
				1504	}
				1505	*data = 0;
				1506	return mp_obj_str_builder_end(s);
				1507	}
				1508
				1509	STATIC mp_obj_t str_lower(mp_obj_t self_in) {
				1510	return str_caseconv(unichar_tolower, self_in);
				1511	}
				1512
				1513	STATIC mp_obj_t str_upper(mp_obj_t self_in) {
				1514	return str_caseconv(unichar_toupper, self_in);
				1515	}
				1516
				1517	STATIC mp_obj_t str_uni_istype(bool (*f)(unichar), mp_obj_t self_in) {
				1518	GET_STR_DATA_LEN(self_in, self_data, self_len);
				1519
				1520	if (self_len == 0) {
				1521	return mp_const_false; // default to False for empty str
				1522	}
				1523
				1524	if (f != unichar_isupper && f != unichar_islower) {
				1525	for (int i = 0; i < self_len; i++) {
				1526	if (!f(*self_data++)) {
				1527	return mp_const_false;
				1528	}
				1529	}
				1530	} else {
				1531	bool contains_alpha = false;
				1532
				1533	for (int i = 0; i < self_len; i++) { // only check alphanumeric characters
				1534	if (unichar_isalpha(*self_data++)) {
				1535	contains_alpha = true;
				1536	if (!f(*(self_data - 1))) { // -1 because we already incremented above
				1537	return mp_const_false;
				1538	}
				1539	}
				1540	}
				1541
				1542	if (!contains_alpha) {
				1543	return mp_const_false;
				1544	}
				1545	}
				1546
				1547	return mp_const_true;
				1548	}
				1549
				1550	STATIC mp_obj_t str_isspace(mp_obj_t self_in) {
				1551	return str_uni_istype(unichar_isspace, self_in);
				1552	}
				1553
				1554	STATIC mp_obj_t str_isalpha(mp_obj_t self_in) {
				1555	return str_uni_istype(unichar_isalpha, self_in);
				1556	}
				1557
				1558	STATIC mp_obj_t str_isdigit(mp_obj_t self_in) {
				1559	return str_uni_istype(unichar_isdigit, self_in);
				1560	}
				1561
				1562	STATIC mp_obj_t str_isupper(mp_obj_t self_in) {
				1563	return str_uni_istype(unichar_isupper, self_in);
				1564	}
				1565
				1566	STATIC mp_obj_t str_islower(mp_obj_t self_in) {
				1567	return str_uni_istype(unichar_islower, self_in);
				1568	}
				1569
				1570	#if MICROPY_CPYTHON_COMPAT
				1571	// These methods are superfluous in the presense of str() and bytes()
				1572	// constructors.
				1573	// TODO: should accept kwargs too
				1574	STATIC mp_obj_t bytes_decode(uint n_args, const mp_obj_t *args) {
				1575	mp_obj_t new_args[2];
				1576	if (n_args == 1) {
				1577	new_args[0] = args[0];
				1578	new_args[1] = MP_OBJ_NEW_QSTR(MP_QSTR_utf_hyphen_8);
				1579	args = new_args;
				1580	n_args++;
				1581	}
				1582	return str_make_new(NULL, n_args, 0, args);
				1583	}
				1584
				1585	// TODO: should accept kwargs too
				1586	STATIC mp_obj_t str_encode(uint n_args, const mp_obj_t *args) {
				1587	mp_obj_t new_args[2];
				1588	if (n_args == 1) {
				1589	new_args[0] = args[0];
				1590	new_args[1] = MP_OBJ_NEW_QSTR(MP_QSTR_utf_hyphen_8);
				1591	args = new_args;
				1592	n_args++;
				1593	}
				1594	return bytes_make_new(NULL, n_args, 0, args);
				1595	}
				1596	#endif
				1597
				1598	STATIC machine_int_t str_get_buffer(mp_obj_t self_in, mp_buffer_info_t *bufinfo, int flags) {
				1599	if (flags == MP_BUFFER_READ) {
				1600	GET_STR_DATA_LEN(self_in, str_data, str_len);
				1601	bufinfo->buf = (void*)str_data;
				1602	bufinfo->len = str_len;
				1603	bufinfo->typecode = 'b';
				1604	return 0;
				1605	} else {
				1606	// can't write to a string
				1607	bufinfo->buf = NULL;
				1608	bufinfo->len = 0;
				1609	bufinfo->typecode = -1;
				1610	return 1;
				1611	}
				1612	}
				1613
				1614	#if MICROPY_CPYTHON_COMPAT
				1615	STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(bytes_decode_obj, 1, 3, bytes_decode);
				1616	STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_encode_obj, 1, 3, str_encode);
				1617	#endif
				1618	STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_find_obj, 2, 4, str_find);
				1619	STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rfind_obj, 2, 4, str_rfind);
				1620	STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_index_obj, 2, 4, str_index);
				1621	STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rindex_obj, 2, 4, str_rindex);
				1622	STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_join_obj, str_join);
				1623	STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_split_obj, 1, 3, str_split);
				1624	STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rsplit_obj, 1, 3, str_rsplit);
				1625	STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_startswith_obj, 2, 3, str_startswith);
				1626	STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_endswith_obj, 2, 3, str_endswith);
				1627	STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_strip_obj, 1, 2, str_strip);
				1628	STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_lstrip_obj, 1, 2, str_lstrip);
				1629	STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rstrip_obj, 1, 2, str_rstrip);
				1630	STATIC MP_DEFINE_CONST_FUN_OBJ_VAR(str_format_obj, 1, mp_obj_str_format);
				1631	STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_replace_obj, 3, 4, str_replace);
				1632	STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_count_obj, 2, 4, str_count);
				1633	STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_partition_obj, str_partition);
				1634	STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_rpartition_obj, str_rpartition);
				1635	STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_lower_obj, str_lower);
				1636	STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_upper_obj, str_upper);
				1637	STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_isspace_obj, str_isspace);
				1638	STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_isalpha_obj, str_isalpha);
				1639	STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_isdigit_obj, str_isdigit);
				1640	STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_isupper_obj, str_isupper);
				1641	STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_islower_obj, str_islower);
				1642
				1643	STATIC const mp_map_elem_t str_locals_dict_table[] = {
				1644	#if MICROPY_CPYTHON_COMPAT
				1645	{ MP_OBJ_NEW_QSTR(MP_QSTR_decode), (mp_obj_t)&bytes_decode_obj },
				1646	{ MP_OBJ_NEW_QSTR(MP_QSTR_encode), (mp_obj_t)&str_encode_obj },
				1647	#endif
				1648	{ MP_OBJ_NEW_QSTR(MP_QSTR_find), (mp_obj_t)&str_find_obj },
				1649	{ MP_OBJ_NEW_QSTR(MP_QSTR_rfind), (mp_obj_t)&str_rfind_obj },
				1650	{ MP_OBJ_NEW_QSTR(MP_QSTR_index), (mp_obj_t)&str_index_obj },
				1651	{ MP_OBJ_NEW_QSTR(MP_QSTR_rindex), (mp_obj_t)&str_rindex_obj },
				1652	{ MP_OBJ_NEW_QSTR(MP_QSTR_join), (mp_obj_t)&str_join_obj },
				1653	{ MP_OBJ_NEW_QSTR(MP_QSTR_split), (mp_obj_t)&str_split_obj },
				1654	{ MP_OBJ_NEW_QSTR(MP_QSTR_rsplit), (mp_obj_t)&str_rsplit_obj },
				1655	{ MP_OBJ_NEW_QSTR(MP_QSTR_startswith), (mp_obj_t)&str_startswith_obj },
				1656	{ MP_OBJ_NEW_QSTR(MP_QSTR_endswith), (mp_obj_t)&str_endswith_obj },
				1657	{ MP_OBJ_NEW_QSTR(MP_QSTR_strip), (mp_obj_t)&str_strip_obj },
				1658	{ MP_OBJ_NEW_QSTR(MP_QSTR_lstrip), (mp_obj_t)&str_lstrip_obj },
				1659	{ MP_OBJ_NEW_QSTR(MP_QSTR_rstrip), (mp_obj_t)&str_rstrip_obj },
				1660	{ MP_OBJ_NEW_QSTR(MP_QSTR_format), (mp_obj_t)&str_format_obj },
				1661	{ MP_OBJ_NEW_QSTR(MP_QSTR_replace), (mp_obj_t)&str_replace_obj },
				1662	{ MP_OBJ_NEW_QSTR(MP_QSTR_count), (mp_obj_t)&str_count_obj },
				1663	{ MP_OBJ_NEW_QSTR(MP_QSTR_partition), (mp_obj_t)&str_partition_obj },
				1664	{ MP_OBJ_NEW_QSTR(MP_QSTR_rpartition), (mp_obj_t)&str_rpartition_obj },
				1665	{ MP_OBJ_NEW_QSTR(MP_QSTR_lower), (mp_obj_t)&str_lower_obj },
				1666	{ MP_OBJ_NEW_QSTR(MP_QSTR_upper), (mp_obj_t)&str_upper_obj },
				1667	{ MP_OBJ_NEW_QSTR(MP_QSTR_isspace), (mp_obj_t)&str_isspace_obj },
				1668	{ MP_OBJ_NEW_QSTR(MP_QSTR_isalpha), (mp_obj_t)&str_isalpha_obj },
				1669	{ MP_OBJ_NEW_QSTR(MP_QSTR_isdigit), (mp_obj_t)&str_isdigit_obj },
				1670	{ MP_OBJ_NEW_QSTR(MP_QSTR_isupper), (mp_obj_t)&str_isupper_obj },
				1671	{ MP_OBJ_NEW_QSTR(MP_QSTR_islower), (mp_obj_t)&str_islower_obj },
				1672	};
				1673
				1674	STATIC MP_DEFINE_CONST_DICT(str_locals_dict, str_locals_dict_table);
				1675
				1676	const mp_obj_type_t mp_type_str = {
				1677	{ &mp_type_type },
				1678	.name = MP_QSTR_str,
				1679	.print = str_print,
				1680	.make_new = str_make_new,
				1681	.binary_op = str_binary_op,
				1682	.subscr = str_subscr,
				1683	.getiter = mp_obj_new_str_iterator,
				1684	.buffer_p = { .get_buffer = str_get_buffer },
				1685	.locals_dict = (mp_obj_t)&str_locals_dict,
				1686	};
				1687
				1688	// Reuses most of methods from str
				1689	const mp_obj_type_t mp_type_bytes = {
				1690	{ &mp_type_type },
				1691	.name = MP_QSTR_bytes,
				1692	.print = str_print,
				1693	.make_new = bytes_make_new,
				1694	.binary_op = str_binary_op,
				1695	.subscr = str_subscr,
				1696	.getiter = mp_obj_new_bytes_iterator,
				1697	.buffer_p = { .get_buffer = str_get_buffer },
				1698	.locals_dict = (mp_obj_t)&str_locals_dict,
				1699	};
				1700
				1701	// the zero-length bytes
				1702	STATIC const mp_obj_str_t empty_bytes_obj = {{&mp_type_bytes}, 0, 0, NULL};
				1703	const mp_obj_t mp_const_empty_bytes = (mp_obj_t)&empty_bytes_obj;
				1704
				1705	mp_obj_t mp_obj_str_builder_start(const mp_obj_type_t type, uint len, byte *data) {
				1706	mp_obj_str_t *o = m_new_obj(mp_obj_str_t);
				1707	o->base.type = type;
				1708	o->len = len;
				1709	o->hash = 0;
				1710	byte *p = m_new(byte, len + 1);
				1711	o->data = p;
				1712	*data = p;
				1713	return o;
				1714	}
				1715
				1716	mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in) {
				1717	mp_obj_str_t *o = o_in;
				1718	o->hash = qstr_compute_hash(o->data, o->len);
				1719	byte p = (byte)o->data;
				1720	p[o->len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings
				1721	return o;
				1722	}
				1723
				1724	mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t type, const byte data, uint len) {
				1725	mp_obj_str_t *o = m_new_obj(mp_obj_str_t);
				1726	o->base.type = type;
				1727	o->len = len;
				1728	if (data) {
				1729	o->hash = qstr_compute_hash(data, len);
				1730	byte *p = m_new(byte, len + 1);
				1731	o->data = p;
				1732	memcpy(p, data, len * sizeof(byte));
				1733	p[len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings
				1734	}
				1735	return o;
				1736	}
				1737
				1738	mp_obj_t mp_obj_new_str(const char* data, uint len, bool make_qstr_if_not_already) {
				1739	if (make_qstr_if_not_already) {
				1740	// use existing, or make a new qstr
				1741	return MP_OBJ_NEW_QSTR(qstr_from_strn(data, len));
				1742	} else {
				1743	qstr q = qstr_find_strn(data, len);
				1744	if (q != MP_QSTR_NULL) {
				1745	// qstr with this data already exists
				1746	return MP_OBJ_NEW_QSTR(q);
				1747	} else {
				1748	// no existing qstr, don't make one
				1749	return mp_obj_new_str_of_type(&mp_type_str, (const byte*)data, len);
				1750	}
				1751	}
				1752	}
				1753
				1754	mp_obj_t mp_obj_str_intern(mp_obj_t str) {
				1755	GET_STR_DATA_LEN(str, data, len);
				1756	return MP_OBJ_NEW_QSTR(qstr_from_strn((const char*)data, len));
				1757	}
				1758
				1759	mp_obj_t mp_obj_new_bytes(const byte* data, uint len) {
				1760	return mp_obj_new_str_of_type(&mp_type_bytes, data, len);
				1761	}
				1762
				1763	bool mp_obj_str_equal(mp_obj_t s1, mp_obj_t s2) {
				1764	if (MP_OBJ_IS_QSTR(s1) && MP_OBJ_IS_QSTR(s2)) {
				1765	return s1 == s2;
				1766	} else {
				1767	GET_STR_HASH(s1, h1);
				1768	GET_STR_HASH(s2, h2);
				1769	// If any of hashes is 0, it means it's not valid
				1770	if (h1 != 0 && h2 != 0 && h1 != h2) {
				1771	return false;
				1772	}
				1773	GET_STR_DATA_LEN(s1, d1, l1);
				1774	GET_STR_DATA_LEN(s2, d2, l2);
				1775	if (l1 != l2) {
				1776	return false;
				1777	}
				1778	return memcmp(d1, d2, l1) == 0;
				1779	}
				1780	}
				1781
				1782	STATIC void bad_implicit_conversion(mp_obj_t self_in) {
				1783	nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError, "Can't convert '%s' object to str implicitly", mp_obj_get_type_str(self_in)));
				1784	}
				1785
				1786	STATIC void arg_type_mixup() {
				1787	nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "Can't mix str and bytes arguments"));
				1788	}
				1789
				1790	uint mp_obj_str_get_hash(mp_obj_t self_in) {
				1791	// TODO: This has too big overhead for hash accessor
				1792	if (MP_OBJ_IS_STR(self_in) \|\| MP_OBJ_IS_TYPE(self_in, &mp_type_bytes)) {
				1793	GET_STR_HASH(self_in, h);
				1794	return h;
				1795	} else {
				1796	bad_implicit_conversion(self_in);
				1797	}
				1798	}
				1799
				1800	uint mp_obj_str_get_len(mp_obj_t self_in) {
				1801	// TODO This has a double check for the type, one in obj.c and one here
				1802	if (MP_OBJ_IS_STR(self_in) \|\| MP_OBJ_IS_TYPE(self_in, &mp_type_bytes)) {
				1803	GET_STR_LEN(self_in, l);
				1804	return l;
				1805	} else {
				1806	bad_implicit_conversion(self_in);
				1807	}
				1808	}
				1809
				1810	// use this if you will anyway convert the string to a qstr
				1811	// will be more efficient for the case where it's already a qstr
				1812	qstr mp_obj_str_get_qstr(mp_obj_t self_in) {
				1813	if (MP_OBJ_IS_QSTR(self_in)) {
				1814	return MP_OBJ_QSTR_VALUE(self_in);
				1815	} else if (MP_OBJ_IS_TYPE(self_in, &mp_type_str)) {
				1816	mp_obj_str_t *self = self_in;
				1817	return qstr_from_strn((char*)self->data, self->len);
				1818	} else {
				1819	bad_implicit_conversion(self_in);
				1820	}
				1821	}
				1822
				1823	// only use this function if you need the str data to be zero terminated
				1824	// at the moment all strings are zero terminated to help with C ASCIIZ compatibility
				1825	const char *mp_obj_str_get_str(mp_obj_t self_in) {
				1826	if (MP_OBJ_IS_STR(self_in)) {
				1827	GET_STR_DATA_LEN(self_in, s, l);
				1828	(void)l; // len unused
				1829	return (const char*)s;
				1830	} else {
				1831	bad_implicit_conversion(self_in);
				1832	}
				1833	}
				1834
				1835	const char mp_obj_str_get_data(mp_obj_t self_in, uint len) {
				1836	if (is_str_or_bytes(self_in)) {
				1837	GET_STR_DATA_LEN(self_in, s, l);
				1838	*len = l;
				1839	return (const char*)s;
				1840	} else {
				1841	bad_implicit_conversion(self_in);
				1842	}
				1843	}
				1844
				1845	/******************************************************************************/
				1846	/* str iterator */
				1847
				1848	typedef struct _mp_obj_str_it_t {
				1849	mp_obj_base_t base;
				1850	mp_obj_t str;
				1851	machine_uint_t cur;
				1852	} mp_obj_str_it_t;
				1853
				1854	STATIC mp_obj_t str_it_iternext(mp_obj_t self_in) {
				1855	mp_obj_str_it_t *self = self_in;
				1856	GET_STR_DATA_LEN(self->str, str, len);
				1857	if (self->cur < len) {
				1858	mp_obj_t o_out = mp_obj_new_str((const char*)str + self->cur, 1, true);
				1859	self->cur += 1;
				1860	return o_out;
				1861	} else {
				1862	return MP_OBJ_STOP_ITERATION;
				1863	}
				1864	}
				1865
				1866	STATIC const mp_obj_type_t mp_type_str_it = {
				1867	{ &mp_type_type },
				1868	.name = MP_QSTR_iterator,
				1869	.getiter = mp_identity,
				1870	.iternext = str_it_iternext,
				1871	};
				1872
				1873	STATIC mp_obj_t bytes_it_iternext(mp_obj_t self_in) {
				1874	mp_obj_str_it_t *self = self_in;
				1875	GET_STR_DATA_LEN(self->str, str, len);
				1876	if (self->cur < len) {
				1877	mp_obj_t o_out = MP_OBJ_NEW_SMALL_INT((mp_small_int_t)str[self->cur]);
				1878	self->cur += 1;
				1879	return o_out;
				1880	} else {
				1881	return MP_OBJ_STOP_ITERATION;
				1882	}
				1883	}
				1884
				1885	STATIC const mp_obj_type_t mp_type_bytes_it = {
				1886	{ &mp_type_type },
				1887	.name = MP_QSTR_iterator,
				1888	.getiter = mp_identity,
				1889	.iternext = bytes_it_iternext,
				1890	};
				1891
				1892	mp_obj_t mp_obj_new_str_iterator(mp_obj_t str) {
				1893	mp_obj_str_it_t *o = m_new_obj(mp_obj_str_it_t);
				1894	o->base.type = &mp_type_str_it;
				1895	o->str = str;
				1896	o->cur = 0;
				1897	return o;
				1898	}
				1899
				1900	mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str) {
				1901	mp_obj_str_it_t *o = m_new_obj(mp_obj_str_it_t);
				1902	o->base.type = &mp_type_bytes_it;
				1903	o->str = str;
				1904	o->cur = 0;
				1905	return o;
				1906	}