objstr: Be 8-bit clean even for repr(). This will allow roughly the same behavior as Python3 for non-ASCII strings, for example, print("<phrase in non-Latin script>".split()) will print list of words, not weird hex dump (like Python2 behaves). (Of course, that it will print list of words, if there're "words" in that phrase at all, separated by ASCII-compatible whitespace; that surely won't apply to every human language in existence).

commit: 2ec38a17d4e357f8f12ee6a2643e2dd2ff7a426e [log] [tgz]
author: Paul Sokolovsky <pfalcon@users.sourceforge.net> Fri Jun 13 21:23:00 2014 +0300
committer: Paul Sokolovsky <pfalcon@users.sourceforge.net> Sat Jun 14 01:21:13 2014 +0300
tree: 5bc282117e32cb70604b617d5692900529cc9ded
parent: e9036c295ca1240946c122044e86ba8b569184e1 [diff] [blame]
diff --git a/py/objstr.c b/py/objstr.c
index 6656090..f9cc273 100644
--- a/py/objstr.c
+++ b/py/objstr.c

@@ -64,7 +64,8 @@
 /******************************************************************************/
 /* str                                                                        */
 
-void mp_str_print_quoted(void (*print)(void *env, const char *fmt, ...), void *env, const byte *str_data, uint str_len) {
+void mp_str_print_quoted(void (*print)(void *env, const char *fmt, ...), void *env,
+                         const byte *str_data, uint str_len, bool is_bytes) {
     // this escapes characters, but it will be very slow to print (calling print many times)
     bool has_single_quote = false;
     bool has_double_quote = false;
@@ -85,7 +86,10 @@
             print(env, "\\%c", quote_char);
         } else if (*s == '\\') {
             print(env, "\\\\");
-        } else if (32 <= *s && *s <= 126) {
+        } else if (*s >= 0x20 && *s != 0x7f && (!is_bytes || *s < 0x80)) {
+            // In strings, anything which is not ascii control character
+            // is printed as is, this includes characters in range 0x80-0xff
+            // (which can be non-Latin letters, etc.)
             print(env, "%c", *s);
         } else if (*s == '\n') {
             print(env, "\\n");
@@ -109,7 +113,7 @@
         if (is_bytes) {
             print(env, "b");
         }
-        mp_str_print_quoted(print, env, str_data, str_len);
+        mp_str_print_quoted(print, env, str_data, str_len, is_bytes);
     }
 }
commit	2ec38a17d4e357f8f12ee6a2643e2dd2ff7a426e	[log] [tgz]
author	Paul Sokolovsky <pfalcon@users.sourceforge.net>	Fri Jun 13 21:23:00 2014 +0300
committer	Paul Sokolovsky <pfalcon@users.sourceforge.net>	Sat Jun 14 01:21:13 2014 +0300
tree	5bc282117e32cb70604b617d5692900529cc9ded
parent	e9036c295ca1240946c122044e86ba8b569184e1 [diff] [blame]