blob: 3812c253fc3e3788b72fbf75a0e2e29f4c0f78cd [file] [log] [blame]
Jon Medhurstaaf37a32013-06-11 12:10:56 +01001/*
Jon Medhurst96b56152014-10-30 18:01:15 +00002 * "$Id: mxml-file.c 455 2014-01-05 03:28:03Z msweet $"
Jon Medhurstaaf37a32013-06-11 12:10:56 +01003 *
4 * File loading code for Mini-XML, a small XML-like file parsing library.
5 *
Jon Medhurst96b56152014-10-30 18:01:15 +00006 * Copyright 2003-2014 by Michael R Sweet.
Jon Medhurstaaf37a32013-06-11 12:10:56 +01007 *
8 * These coded instructions, statements, and computer programs are the
9 * property of Michael R Sweet and are protected by Federal copyright
10 * law. Distribution and use rights are outlined in the file "COPYING"
11 * which should have been included with this file. If this file is
12 * missing or damaged, see the license at:
13 *
Jon Medhurst96b56152014-10-30 18:01:15 +000014 * http://www.msweet.org/projects.php/Mini-XML
Jon Medhurstaaf37a32013-06-11 12:10:56 +010015 */
16
Jon Medhurst96b56152014-10-30 18:01:15 +000017/*** This file modified by ARM on 25 Aug 2014 to avoid pointer overflow when checking if the write position is beyond the end of the buffer in mxmlSaveString and mxml_string_putc ***/
18
Jon Medhurstaaf37a32013-06-11 12:10:56 +010019/*
20 * Include necessary headers...
21 */
22
23#ifndef WIN32
24# include <unistd.h>
25#endif /* !WIN32 */
26#include "mxml-private.h"
27
28
29/*
30 * Character encoding...
31 */
32
33#define ENCODE_UTF8 0 /* UTF-8 */
34#define ENCODE_UTF16BE 1 /* UTF-16 Big-Endian */
35#define ENCODE_UTF16LE 2 /* UTF-16 Little-Endian */
36
37
38/*
39 * Macro to test for a bad XML character...
40 */
41
42#define mxml_bad_char(ch) ((ch) < ' ' && (ch) != '\n' && (ch) != '\r' && (ch) != '\t')
43
44
45/*
46 * Types and structures...
47 */
48
49typedef int (*_mxml_getc_cb_t)(void *, int *);
50typedef int (*_mxml_putc_cb_t)(int, void *);
51
52typedef struct _mxml_fdbuf_s /**** File descriptor buffer ****/
53{
54 int fd; /* File descriptor */
55 unsigned char *current, /* Current position in buffer */
56 *end, /* End of buffer */
57 buffer[8192]; /* Character buffer */
58} _mxml_fdbuf_t;
59
60
61/*
62 * Local functions...
63 */
64
65static int mxml_add_char(int ch, char **ptr, char **buffer,
66 int *bufsize);
67static int mxml_fd_getc(void *p, int *encoding);
68static int mxml_fd_putc(int ch, void *p);
69static int mxml_fd_read(_mxml_fdbuf_t *buf);
70static int mxml_fd_write(_mxml_fdbuf_t *buf);
71static int mxml_file_getc(void *p, int *encoding);
72static int mxml_file_putc(int ch, void *p);
73static int mxml_get_entity(mxml_node_t *parent, void *p,
74 int *encoding,
75 _mxml_getc_cb_t getc_cb);
76static inline int mxml_isspace(int ch)
77 {
78 return (ch == ' ' || ch == '\t' || ch == '\r' ||
79 ch == '\n');
80 }
81static mxml_node_t *mxml_load_data(mxml_node_t *top, void *p,
82 mxml_load_cb_t cb,
83 _mxml_getc_cb_t getc_cb,
84 mxml_sax_cb_t sax_cb, void *sax_data);
85static int mxml_parse_element(mxml_node_t *node, void *p,
86 int *encoding,
87 _mxml_getc_cb_t getc_cb);
88static int mxml_string_getc(void *p, int *encoding);
89static int mxml_string_putc(int ch, void *p);
90static int mxml_write_name(const char *s, void *p,
91 _mxml_putc_cb_t putc_cb);
92static int mxml_write_node(mxml_node_t *node, void *p,
93 mxml_save_cb_t cb, int col,
94 _mxml_putc_cb_t putc_cb,
95 _mxml_global_t *global);
96static int mxml_write_string(const char *s, void *p,
97 _mxml_putc_cb_t putc_cb);
Jon Medhurst96b56152014-10-30 18:01:15 +000098static int mxml_write_ws(mxml_node_t *node, void *p,
Jon Medhurstaaf37a32013-06-11 12:10:56 +010099 mxml_save_cb_t cb, int ws,
100 int col, _mxml_putc_cb_t putc_cb);
101
102
103/*
104 * 'mxmlLoadFd()' - Load a file descriptor into an XML node tree.
105 *
106 * The nodes in the specified file are added to the specified top node.
107 * If no top node is provided, the XML file MUST be well-formed with a
108 * single parent node like <?xml> for the entire file. The callback
109 * function returns the value type that should be used for child nodes.
110 * If MXML_NO_CALLBACK is specified then all child nodes will be either
111 * MXML_ELEMENT or MXML_TEXT nodes.
112 *
113 * The constants MXML_INTEGER_CALLBACK, MXML_OPAQUE_CALLBACK,
114 * MXML_REAL_CALLBACK, and MXML_TEXT_CALLBACK are defined for loading
115 * child nodes of the specified type.
116 */
117
118mxml_node_t * /* O - First node or NULL if the file could not be read. */
119mxmlLoadFd(mxml_node_t *top, /* I - Top node */
120 int fd, /* I - File descriptor to read from */
121 mxml_load_cb_t cb) /* I - Callback function or MXML_NO_CALLBACK */
122{
123 _mxml_fdbuf_t buf; /* File descriptor buffer */
124
125
126 /*
127 * Initialize the file descriptor buffer...
128 */
129
130 buf.fd = fd;
131 buf.current = buf.buffer;
132 buf.end = buf.buffer;
133
134 /*
135 * Read the XML data...
136 */
137
138 return (mxml_load_data(top, &buf, cb, mxml_fd_getc, MXML_NO_CALLBACK, NULL));
139}
140
141
142/*
143 * 'mxmlLoadFile()' - Load a file into an XML node tree.
144 *
145 * The nodes in the specified file are added to the specified top node.
146 * If no top node is provided, the XML file MUST be well-formed with a
147 * single parent node like <?xml> for the entire file. The callback
148 * function returns the value type that should be used for child nodes.
149 * If MXML_NO_CALLBACK is specified then all child nodes will be either
150 * MXML_ELEMENT or MXML_TEXT nodes.
151 *
152 * The constants MXML_INTEGER_CALLBACK, MXML_OPAQUE_CALLBACK,
153 * MXML_REAL_CALLBACK, and MXML_TEXT_CALLBACK are defined for loading
154 * child nodes of the specified type.
155 */
156
157mxml_node_t * /* O - First node or NULL if the file could not be read. */
158mxmlLoadFile(mxml_node_t *top, /* I - Top node */
159 FILE *fp, /* I - File to read from */
160 mxml_load_cb_t cb) /* I - Callback function or MXML_NO_CALLBACK */
161{
162 /*
163 * Read the XML data...
164 */
165
166 return (mxml_load_data(top, fp, cb, mxml_file_getc, MXML_NO_CALLBACK, NULL));
167}
168
169
170/*
171 * 'mxmlLoadString()' - Load a string into an XML node tree.
172 *
173 * The nodes in the specified string are added to the specified top node.
174 * If no top node is provided, the XML string MUST be well-formed with a
175 * single parent node like <?xml> for the entire string. The callback
176 * function returns the value type that should be used for child nodes.
177 * If MXML_NO_CALLBACK is specified then all child nodes will be either
178 * MXML_ELEMENT or MXML_TEXT nodes.
179 *
180 * The constants MXML_INTEGER_CALLBACK, MXML_OPAQUE_CALLBACK,
181 * MXML_REAL_CALLBACK, and MXML_TEXT_CALLBACK are defined for loading
182 * child nodes of the specified type.
183 */
184
185mxml_node_t * /* O - First node or NULL if the string has errors. */
186mxmlLoadString(mxml_node_t *top, /* I - Top node */
187 const char *s, /* I - String to load */
188 mxml_load_cb_t cb) /* I - Callback function or MXML_NO_CALLBACK */
189{
190 /*
191 * Read the XML data...
192 */
193
194 return (mxml_load_data(top, (void *)&s, cb, mxml_string_getc, MXML_NO_CALLBACK,
195 NULL));
196}
197
198
199/*
200 * 'mxmlSaveAllocString()' - Save an XML tree to an allocated string.
201 *
202 * This function returns a pointer to a string containing the textual
203 * representation of the XML node tree. The string should be freed
204 * using the free() function when you are done with it. NULL is returned
205 * if the node would produce an empty string or if the string cannot be
206 * allocated.
207 *
208 * The callback argument specifies a function that returns a whitespace
209 * string or NULL before and after each element. If MXML_NO_CALLBACK
210 * is specified, whitespace will only be added before MXML_TEXT nodes
211 * with leading whitespace and before attribute names inside opening
212 * element tags.
213 */
214
215char * /* O - Allocated string or NULL */
216mxmlSaveAllocString(
217 mxml_node_t *node, /* I - Node to write */
218 mxml_save_cb_t cb) /* I - Whitespace callback or MXML_NO_CALLBACK */
219{
220 int bytes; /* Required bytes */
221 char buffer[8192]; /* Temporary buffer */
222 char *s; /* Allocated string */
223
224
225 /*
226 * Write the node to the temporary buffer...
227 */
228
229 bytes = mxmlSaveString(node, buffer, sizeof(buffer), cb);
230
231 if (bytes <= 0)
232 return (NULL);
233
234 if (bytes < (int)(sizeof(buffer) - 1))
235 {
236 /*
237 * Node fit inside the buffer, so just duplicate that string and
238 * return...
239 */
240
241 return (strdup(buffer));
242 }
243
244 /*
245 * Allocate a buffer of the required size and save the node to the
246 * new buffer...
247 */
248
249 if ((s = malloc(bytes + 1)) == NULL)
250 return (NULL);
251
252 mxmlSaveString(node, s, bytes + 1, cb);
253
254 /*
255 * Return the allocated string...
256 */
257
258 return (s);
259}
260
261
262/*
263 * 'mxmlSaveFd()' - Save an XML tree to a file descriptor.
264 *
265 * The callback argument specifies a function that returns a whitespace
266 * string or NULL before and after each element. If MXML_NO_CALLBACK
267 * is specified, whitespace will only be added before MXML_TEXT nodes
268 * with leading whitespace and before attribute names inside opening
269 * element tags.
270 */
271
272int /* O - 0 on success, -1 on error. */
273mxmlSaveFd(mxml_node_t *node, /* I - Node to write */
274 int fd, /* I - File descriptor to write to */
275 mxml_save_cb_t cb) /* I - Whitespace callback or MXML_NO_CALLBACK */
276{
277 int col; /* Final column */
278 _mxml_fdbuf_t buf; /* File descriptor buffer */
279 _mxml_global_t *global = _mxml_global();
280 /* Global data */
281
282
283 /*
284 * Initialize the file descriptor buffer...
285 */
286
287 buf.fd = fd;
288 buf.current = buf.buffer;
289 buf.end = buf.buffer + sizeof(buf.buffer);
290
291 /*
292 * Write the node...
293 */
294
295 if ((col = mxml_write_node(node, &buf, cb, 0, mxml_fd_putc, global)) < 0)
296 return (-1);
297
298 if (col > 0)
299 if (mxml_fd_putc('\n', &buf) < 0)
300 return (-1);
301
302 /*
303 * Flush and return...
304 */
305
306 return (mxml_fd_write(&buf));
307}
308
309
310/*
311 * 'mxmlSaveFile()' - Save an XML tree to a file.
312 *
313 * The callback argument specifies a function that returns a whitespace
314 * string or NULL before and after each element. If MXML_NO_CALLBACK
315 * is specified, whitespace will only be added before MXML_TEXT nodes
316 * with leading whitespace and before attribute names inside opening
317 * element tags.
318 */
319
320int /* O - 0 on success, -1 on error. */
321mxmlSaveFile(mxml_node_t *node, /* I - Node to write */
322 FILE *fp, /* I - File to write to */
323 mxml_save_cb_t cb) /* I - Whitespace callback or MXML_NO_CALLBACK */
324{
325 int col; /* Final column */
326 _mxml_global_t *global = _mxml_global();
327 /* Global data */
328
329
330 /*
331 * Write the node...
332 */
333
334 if ((col = mxml_write_node(node, fp, cb, 0, mxml_file_putc, global)) < 0)
335 return (-1);
336
337 if (col > 0)
338 if (putc('\n', fp) < 0)
339 return (-1);
340
341 /*
342 * Return 0 (success)...
343 */
344
345 return (0);
346}
347
348
349/*
350 * 'mxmlSaveString()' - Save an XML node tree to a string.
351 *
352 * This function returns the total number of bytes that would be
353 * required for the string but only copies (bufsize - 1) characters
354 * into the specified buffer.
355 *
356 * The callback argument specifies a function that returns a whitespace
357 * string or NULL before and after each element. If MXML_NO_CALLBACK
358 * is specified, whitespace will only be added before MXML_TEXT nodes
359 * with leading whitespace and before attribute names inside opening
360 * element tags.
361 */
362
363int /* O - Size of string */
364mxmlSaveString(mxml_node_t *node, /* I - Node to write */
365 char *buffer, /* I - String buffer */
366 int bufsize, /* I - Size of string buffer */
367 mxml_save_cb_t cb) /* I - Whitespace callback or MXML_NO_CALLBACK */
368{
369 int col; /* Final column */
Jon Medhurst96b56152014-10-30 18:01:15 +0000370 char *ptr[3]; /* Pointers for putc_cb */
Jon Medhurstaaf37a32013-06-11 12:10:56 +0100371 _mxml_global_t *global = _mxml_global();
372 /* Global data */
373
374
375 /*
376 * Write the node...
377 */
378
379 ptr[0] = buffer;
380 ptr[1] = buffer + bufsize;
Jon Medhurst96b56152014-10-30 18:01:15 +0000381 ptr[2] = 0;
Jon Medhurstaaf37a32013-06-11 12:10:56 +0100382
383 if ((col = mxml_write_node(node, ptr, cb, 0, mxml_string_putc, global)) < 0)
384 return (-1);
385
386 if (col > 0)
387 mxml_string_putc('\n', ptr);
388
389 /*
390 * Nul-terminate the buffer...
391 */
392
Jon Medhurst96b56152014-10-30 18:01:15 +0000393 if (ptr[2] != 0)
Jon Medhurstaaf37a32013-06-11 12:10:56 +0100394 buffer[bufsize - 1] = '\0';
395 else
396 ptr[0][0] = '\0';
397
398 /*
399 * Return the number of characters...
400 */
401
402 return (ptr[0] - buffer);
403}
404
405
406/*
407 * 'mxmlSAXLoadFd()' - Load a file descriptor into an XML node tree
408 * using a SAX callback.
409 *
410 * The nodes in the specified file are added to the specified top node.
411 * If no top node is provided, the XML file MUST be well-formed with a
412 * single parent node like <?xml> for the entire file. The callback
413 * function returns the value type that should be used for child nodes.
414 * If MXML_NO_CALLBACK is specified then all child nodes will be either
415 * MXML_ELEMENT or MXML_TEXT nodes.
416 *
417 * The constants MXML_INTEGER_CALLBACK, MXML_OPAQUE_CALLBACK,
418 * MXML_REAL_CALLBACK, and MXML_TEXT_CALLBACK are defined for loading
419 * child nodes of the specified type.
420 *
421 * The SAX callback must call mxmlRetain() for any nodes that need to
422 * be kept for later use. Otherwise, nodes are deleted when the parent
423 * node is closed or after each data, comment, CDATA, or directive node.
424 *
425 * @since Mini-XML 2.3@
426 */
427
428mxml_node_t * /* O - First node or NULL if the file could not be read. */
429mxmlSAXLoadFd(mxml_node_t *top, /* I - Top node */
430 int fd, /* I - File descriptor to read from */
431 mxml_load_cb_t cb, /* I - Callback function or MXML_NO_CALLBACK */
432 mxml_sax_cb_t sax_cb, /* I - SAX callback or MXML_NO_CALLBACK */
433 void *sax_data) /* I - SAX user data */
434{
435 _mxml_fdbuf_t buf; /* File descriptor buffer */
436
437
438 /*
439 * Initialize the file descriptor buffer...
440 */
441
442 buf.fd = fd;
443 buf.current = buf.buffer;
444 buf.end = buf.buffer;
445
446 /*
447 * Read the XML data...
448 */
449
450 return (mxml_load_data(top, &buf, cb, mxml_fd_getc, sax_cb, sax_data));
451}
452
453
454/*
455 * 'mxmlSAXLoadFile()' - Load a file into an XML node tree
456 * using a SAX callback.
457 *
458 * The nodes in the specified file are added to the specified top node.
459 * If no top node is provided, the XML file MUST be well-formed with a
460 * single parent node like <?xml> for the entire file. The callback
461 * function returns the value type that should be used for child nodes.
462 * If MXML_NO_CALLBACK is specified then all child nodes will be either
463 * MXML_ELEMENT or MXML_TEXT nodes.
464 *
465 * The constants MXML_INTEGER_CALLBACK, MXML_OPAQUE_CALLBACK,
466 * MXML_REAL_CALLBACK, and MXML_TEXT_CALLBACK are defined for loading
467 * child nodes of the specified type.
468 *
469 * The SAX callback must call mxmlRetain() for any nodes that need to
470 * be kept for later use. Otherwise, nodes are deleted when the parent
471 * node is closed or after each data, comment, CDATA, or directive node.
472 *
473 * @since Mini-XML 2.3@
474 */
475
476mxml_node_t * /* O - First node or NULL if the file could not be read. */
477mxmlSAXLoadFile(
478 mxml_node_t *top, /* I - Top node */
479 FILE *fp, /* I - File to read from */
480 mxml_load_cb_t cb, /* I - Callback function or MXML_NO_CALLBACK */
481 mxml_sax_cb_t sax_cb, /* I - SAX callback or MXML_NO_CALLBACK */
482 void *sax_data) /* I - SAX user data */
483{
484 /*
485 * Read the XML data...
486 */
487
488 return (mxml_load_data(top, fp, cb, mxml_file_getc, sax_cb, sax_data));
489}
490
491
492/*
493 * 'mxmlSAXLoadString()' - Load a string into an XML node tree
494 * using a SAX callback.
495 *
496 * The nodes in the specified string are added to the specified top node.
497 * If no top node is provided, the XML string MUST be well-formed with a
498 * single parent node like <?xml> for the entire string. The callback
499 * function returns the value type that should be used for child nodes.
500 * If MXML_NO_CALLBACK is specified then all child nodes will be either
501 * MXML_ELEMENT or MXML_TEXT nodes.
502 *
503 * The constants MXML_INTEGER_CALLBACK, MXML_OPAQUE_CALLBACK,
504 * MXML_REAL_CALLBACK, and MXML_TEXT_CALLBACK are defined for loading
505 * child nodes of the specified type.
506 *
507 * The SAX callback must call mxmlRetain() for any nodes that need to
508 * be kept for later use. Otherwise, nodes are deleted when the parent
509 * node is closed or after each data, comment, CDATA, or directive node.
510 *
511 * @since Mini-XML 2.3@
512 */
513
514mxml_node_t * /* O - First node or NULL if the string has errors. */
515mxmlSAXLoadString(
516 mxml_node_t *top, /* I - Top node */
517 const char *s, /* I - String to load */
518 mxml_load_cb_t cb, /* I - Callback function or MXML_NO_CALLBACK */
519 mxml_sax_cb_t sax_cb, /* I - SAX callback or MXML_NO_CALLBACK */
520 void *sax_data) /* I - SAX user data */
521{
522 /*
523 * Read the XML data...
524 */
525
526 return (mxml_load_data(top, (void *)&s, cb, mxml_string_getc, sax_cb, sax_data));
527}
528
529
530/*
531 * 'mxmlSetCustomHandlers()' - Set the handling functions for custom data.
532 *
533 * The load function accepts a node pointer and a data string and must
534 * return 0 on success and non-zero on error.
535 *
536 * The save function accepts a node pointer and must return a malloc'd
537 * string on success and NULL on error.
Jon Medhurst96b56152014-10-30 18:01:15 +0000538 *
Jon Medhurstaaf37a32013-06-11 12:10:56 +0100539 */
540
541void
542mxmlSetCustomHandlers(
543 mxml_custom_load_cb_t load, /* I - Load function */
544 mxml_custom_save_cb_t save) /* I - Save function */
545{
546 _mxml_global_t *global = _mxml_global();
547 /* Global data */
548
549
550 global->custom_load_cb = load;
551 global->custom_save_cb = save;
552}
553
554
555/*
556 * 'mxmlSetErrorCallback()' - Set the error message callback.
557 */
558
559void
560mxmlSetErrorCallback(mxml_error_cb_t cb)/* I - Error callback function */
561{
562 _mxml_global_t *global = _mxml_global();
563 /* Global data */
564
565
566 global->error_cb = cb;
567}
568
569
570/*
571 * 'mxmlSetWrapMargin()' - Set the wrap margin when saving XML data.
572 *
573 * Wrapping is disabled when "column" is 0.
574 *
575 * @since Mini-XML 2.3@
576 */
577
578void
579mxmlSetWrapMargin(int column) /* I - Column for wrapping, 0 to disable wrapping */
580{
581 _mxml_global_t *global = _mxml_global();
582 /* Global data */
583
584
585 global->wrap = column;
586}
587
588
589/*
590 * 'mxml_add_char()' - Add a character to a buffer, expanding as needed.
591 */
592
593static int /* O - 0 on success, -1 on error */
594mxml_add_char(int ch, /* I - Character to add */
595 char **bufptr, /* IO - Current position in buffer */
596 char **buffer, /* IO - Current buffer */
597 int *bufsize) /* IO - Current buffer size */
598{
599 char *newbuffer; /* New buffer value */
600
601
602 if (*bufptr >= (*buffer + *bufsize - 4))
603 {
604 /*
605 * Increase the size of the buffer...
606 */
607
608 if (*bufsize < 1024)
609 (*bufsize) *= 2;
610 else
611 (*bufsize) += 1024;
612
613 if ((newbuffer = realloc(*buffer, *bufsize)) == NULL)
614 {
615 free(*buffer);
616
617 mxml_error("Unable to expand string buffer to %d bytes!", *bufsize);
618
619 return (-1);
620 }
621
622 *bufptr = newbuffer + (*bufptr - *buffer);
623 *buffer = newbuffer;
624 }
625
626 if (ch < 0x80)
627 {
628 /*
629 * Single byte ASCII...
630 */
631
632 *(*bufptr)++ = ch;
633 }
634 else if (ch < 0x800)
635 {
636 /*
637 * Two-byte UTF-8...
638 */
639
640 *(*bufptr)++ = 0xc0 | (ch >> 6);
641 *(*bufptr)++ = 0x80 | (ch & 0x3f);
642 }
643 else if (ch < 0x10000)
644 {
645 /*
646 * Three-byte UTF-8...
647 */
648
649 *(*bufptr)++ = 0xe0 | (ch >> 12);
650 *(*bufptr)++ = 0x80 | ((ch >> 6) & 0x3f);
651 *(*bufptr)++ = 0x80 | (ch & 0x3f);
652 }
653 else
654 {
655 /*
656 * Four-byte UTF-8...
657 */
658
659 *(*bufptr)++ = 0xf0 | (ch >> 18);
660 *(*bufptr)++ = 0x80 | ((ch >> 12) & 0x3f);
661 *(*bufptr)++ = 0x80 | ((ch >> 6) & 0x3f);
662 *(*bufptr)++ = 0x80 | (ch & 0x3f);
663 }
664
665 return (0);
666}
667
668
669/*
670 * 'mxml_fd_getc()' - Read a character from a file descriptor.
671 */
672
673static int /* O - Character or EOF */
674mxml_fd_getc(void *p, /* I - File descriptor buffer */
675 int *encoding) /* IO - Encoding */
676{
677 _mxml_fdbuf_t *buf; /* File descriptor buffer */
678 int ch, /* Current character */
679 temp; /* Temporary character */
680
681
682 /*
683 * Grab the next character in the buffer...
684 */
685
686 buf = (_mxml_fdbuf_t *)p;
687
688 if (buf->current >= buf->end)
689 if (mxml_fd_read(buf) < 0)
690 return (EOF);
691
692 ch = *(buf->current)++;
693
694 switch (*encoding)
695 {
696 case ENCODE_UTF8 :
697 /*
698 * Got a UTF-8 character; convert UTF-8 to Unicode and return...
699 */
700
701 if (!(ch & 0x80))
702 {
703#if DEBUG > 1
704 printf("mxml_fd_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
705#endif /* DEBUG > 1 */
706
707 if (mxml_bad_char(ch))
708 {
709 mxml_error("Bad control character 0x%02x not allowed by XML standard!",
710 ch);
711 return (EOF);
712 }
713
714 return (ch);
715 }
716 else if (ch == 0xfe)
717 {
718 /*
719 * UTF-16 big-endian BOM?
720 */
721
722 if (buf->current >= buf->end)
723 if (mxml_fd_read(buf) < 0)
724 return (EOF);
725
726 ch = *(buf->current)++;
Jon Medhurst96b56152014-10-30 18:01:15 +0000727
Jon Medhurstaaf37a32013-06-11 12:10:56 +0100728 if (ch != 0xff)
729 return (EOF);
730
731 *encoding = ENCODE_UTF16BE;
732
733 return (mxml_fd_getc(p, encoding));
734 }
735 else if (ch == 0xff)
736 {
737 /*
738 * UTF-16 little-endian BOM?
739 */
740
741 if (buf->current >= buf->end)
742 if (mxml_fd_read(buf) < 0)
743 return (EOF);
744
745 ch = *(buf->current)++;
Jon Medhurst96b56152014-10-30 18:01:15 +0000746
Jon Medhurstaaf37a32013-06-11 12:10:56 +0100747 if (ch != 0xfe)
748 return (EOF);
749
750 *encoding = ENCODE_UTF16LE;
751
752 return (mxml_fd_getc(p, encoding));
753 }
754 else if ((ch & 0xe0) == 0xc0)
755 {
756 /*
757 * Two-byte value...
758 */
759
760 if (buf->current >= buf->end)
761 if (mxml_fd_read(buf) < 0)
762 return (EOF);
763
764 temp = *(buf->current)++;
765
766 if ((temp & 0xc0) != 0x80)
767 return (EOF);
768
769 ch = ((ch & 0x1f) << 6) | (temp & 0x3f);
770
771 if (ch < 0x80)
772 {
773 mxml_error("Invalid UTF-8 sequence for character 0x%04x!", ch);
774 return (EOF);
775 }
776 }
777 else if ((ch & 0xf0) == 0xe0)
778 {
779 /*
780 * Three-byte value...
781 */
782
783 if (buf->current >= buf->end)
784 if (mxml_fd_read(buf) < 0)
785 return (EOF);
786
787 temp = *(buf->current)++;
788
789 if ((temp & 0xc0) != 0x80)
790 return (EOF);
791
792 ch = ((ch & 0x0f) << 6) | (temp & 0x3f);
793
794 if (buf->current >= buf->end)
795 if (mxml_fd_read(buf) < 0)
796 return (EOF);
797
798 temp = *(buf->current)++;
799
800 if ((temp & 0xc0) != 0x80)
801 return (EOF);
802
803 ch = (ch << 6) | (temp & 0x3f);
804
805 if (ch < 0x800)
806 {
807 mxml_error("Invalid UTF-8 sequence for character 0x%04x!", ch);
808 return (EOF);
809 }
810
811 /*
812 * Ignore (strip) Byte Order Mark (BOM)...
813 */
814
815 if (ch == 0xfeff)
816 return (mxml_fd_getc(p, encoding));
817 }
818 else if ((ch & 0xf8) == 0xf0)
819 {
820 /*
821 * Four-byte value...
822 */
823
824 if (buf->current >= buf->end)
825 if (mxml_fd_read(buf) < 0)
826 return (EOF);
827
828 temp = *(buf->current)++;
829
830 if ((temp & 0xc0) != 0x80)
831 return (EOF);
832
833 ch = ((ch & 0x07) << 6) | (temp & 0x3f);
834
835 if (buf->current >= buf->end)
836 if (mxml_fd_read(buf) < 0)
837 return (EOF);
838
839 temp = *(buf->current)++;
840
841 if ((temp & 0xc0) != 0x80)
842 return (EOF);
843
844 ch = (ch << 6) | (temp & 0x3f);
845
846 if (buf->current >= buf->end)
847 if (mxml_fd_read(buf) < 0)
848 return (EOF);
849
850 temp = *(buf->current)++;
851
852 if ((temp & 0xc0) != 0x80)
853 return (EOF);
854
855 ch = (ch << 6) | (temp & 0x3f);
856
857 if (ch < 0x10000)
858 {
859 mxml_error("Invalid UTF-8 sequence for character 0x%04x!", ch);
860 return (EOF);
861 }
862 }
863 else
864 return (EOF);
865 break;
866
867 case ENCODE_UTF16BE :
868 /*
869 * Read UTF-16 big-endian char...
870 */
871
872 if (buf->current >= buf->end)
873 if (mxml_fd_read(buf) < 0)
874 return (EOF);
875
876 temp = *(buf->current)++;
877
878 ch = (ch << 8) | temp;
879
880 if (mxml_bad_char(ch))
881 {
882 mxml_error("Bad control character 0x%02x not allowed by XML standard!",
883 ch);
884 return (EOF);
885 }
886 else if (ch >= 0xd800 && ch <= 0xdbff)
887 {
888 /*
889 * Multi-word UTF-16 char...
890 */
891
892 int lch;
893
894 if (buf->current >= buf->end)
895 if (mxml_fd_read(buf) < 0)
896 return (EOF);
897
898 lch = *(buf->current)++;
899
900 if (buf->current >= buf->end)
901 if (mxml_fd_read(buf) < 0)
902 return (EOF);
903
904 temp = *(buf->current)++;
905
906 lch = (lch << 8) | temp;
907
908 if (lch < 0xdc00 || lch >= 0xdfff)
909 return (EOF);
910
911 ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
912 }
913 break;
914
915 case ENCODE_UTF16LE :
916 /*
917 * Read UTF-16 little-endian char...
918 */
919
920 if (buf->current >= buf->end)
921 if (mxml_fd_read(buf) < 0)
922 return (EOF);
923
924 temp = *(buf->current)++;
925
926 ch |= (temp << 8);
927
928 if (mxml_bad_char(ch))
929 {
930 mxml_error("Bad control character 0x%02x not allowed by XML standard!",
931 ch);
932 return (EOF);
933 }
934 else if (ch >= 0xd800 && ch <= 0xdbff)
935 {
936 /*
937 * Multi-word UTF-16 char...
938 */
939
940 int lch;
941
942 if (buf->current >= buf->end)
943 if (mxml_fd_read(buf) < 0)
944 return (EOF);
945
946 lch = *(buf->current)++;
947
948 if (buf->current >= buf->end)
949 if (mxml_fd_read(buf) < 0)
950 return (EOF);
951
952 temp = *(buf->current)++;
953
954 lch |= (temp << 8);
955
956 if (lch < 0xdc00 || lch >= 0xdfff)
957 return (EOF);
958
959 ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
960 }
961 break;
962 }
963
964#if DEBUG > 1
965 printf("mxml_fd_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
966#endif /* DEBUG > 1 */
967
968 return (ch);
969}
970
971
972/*
973 * 'mxml_fd_putc()' - Write a character to a file descriptor.
974 */
975
976static int /* O - 0 on success, -1 on error */
977mxml_fd_putc(int ch, /* I - Character */
978 void *p) /* I - File descriptor buffer */
979{
980 _mxml_fdbuf_t *buf; /* File descriptor buffer */
981
982
983 /*
984 * Flush the write buffer as needed...
985 */
986
987 buf = (_mxml_fdbuf_t *)p;
988
989 if (buf->current >= buf->end)
990 if (mxml_fd_write(buf) < 0)
991 return (-1);
992
993 *(buf->current)++ = ch;
994
995 /*
996 * Return successfully...
997 */
998
999 return (0);
1000}
1001
1002
1003/*
1004 * 'mxml_fd_read()' - Read a buffer of data from a file descriptor.
1005 */
1006
1007static int /* O - 0 on success, -1 on error */
1008mxml_fd_read(_mxml_fdbuf_t *buf) /* I - File descriptor buffer */
1009{
1010 int bytes; /* Bytes read... */
1011
1012
1013 /*
1014 * Range check input...
1015 */
1016
1017 if (!buf)
1018 return (-1);
1019
1020 /*
1021 * Read from the file descriptor...
1022 */
1023
1024 while ((bytes = read(buf->fd, buf->buffer, sizeof(buf->buffer))) < 0)
1025#ifdef EINTR
1026 if (errno != EAGAIN && errno != EINTR)
1027#else
1028 if (errno != EAGAIN)
1029#endif /* EINTR */
1030 return (-1);
1031
1032 if (bytes == 0)
1033 return (-1);
1034
1035 /*
1036 * Update the pointers and return success...
1037 */
1038
1039 buf->current = buf->buffer;
1040 buf->end = buf->buffer + bytes;
1041
1042 return (0);
1043}
1044
1045
1046/*
1047 * 'mxml_fd_write()' - Write a buffer of data to a file descriptor.
1048 */
1049
1050static int /* O - 0 on success, -1 on error */
1051mxml_fd_write(_mxml_fdbuf_t *buf) /* I - File descriptor buffer */
1052{
1053 int bytes; /* Bytes written */
1054 unsigned char *ptr; /* Pointer into buffer */
1055
1056
1057 /*
1058 * Range check...
1059 */
1060
1061 if (!buf)
1062 return (-1);
1063
1064 /*
1065 * Return 0 if there is nothing to write...
1066 */
1067
1068 if (buf->current == buf->buffer)
1069 return (0);
1070
1071 /*
1072 * Loop until we have written everything...
1073 */
1074
1075 for (ptr = buf->buffer; ptr < buf->current; ptr += bytes)
1076 if ((bytes = write(buf->fd, ptr, buf->current - ptr)) < 0)
1077 return (-1);
1078
1079 /*
1080 * All done, reset pointers and return success...
1081 */
1082
1083 buf->current = buf->buffer;
1084
1085 return (0);
1086}
1087
1088
1089/*
1090 * 'mxml_file_getc()' - Get a character from a file.
1091 */
1092
1093static int /* O - Character or EOF */
1094mxml_file_getc(void *p, /* I - Pointer to file */
1095 int *encoding) /* IO - Encoding */
1096{
1097 int ch, /* Character from file */
1098 temp; /* Temporary character */
1099 FILE *fp; /* Pointer to file */
1100
1101
1102 /*
1103 * Read a character from the file and see if it is EOF or ASCII...
1104 */
1105
1106 fp = (FILE *)p;
1107 ch = getc(fp);
1108
1109 if (ch == EOF)
1110 return (EOF);
1111
1112 switch (*encoding)
1113 {
1114 case ENCODE_UTF8 :
1115 /*
1116 * Got a UTF-8 character; convert UTF-8 to Unicode and return...
1117 */
1118
1119 if (!(ch & 0x80))
1120 {
1121 if (mxml_bad_char(ch))
1122 {
1123 mxml_error("Bad control character 0x%02x not allowed by XML standard!",
1124 ch);
1125 return (EOF);
1126 }
1127
1128#if DEBUG > 1
1129 printf("mxml_file_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
1130#endif /* DEBUG > 1 */
1131
1132 return (ch);
1133 }
1134 else if (ch == 0xfe)
1135 {
1136 /*
1137 * UTF-16 big-endian BOM?
1138 */
1139
1140 ch = getc(fp);
1141 if (ch != 0xff)
1142 return (EOF);
1143
1144 *encoding = ENCODE_UTF16BE;
1145
1146 return (mxml_file_getc(p, encoding));
1147 }
1148 else if (ch == 0xff)
1149 {
1150 /*
1151 * UTF-16 little-endian BOM?
1152 */
1153
1154 ch = getc(fp);
1155 if (ch != 0xfe)
1156 return (EOF);
1157
1158 *encoding = ENCODE_UTF16LE;
1159
1160 return (mxml_file_getc(p, encoding));
1161 }
1162 else if ((ch & 0xe0) == 0xc0)
1163 {
1164 /*
1165 * Two-byte value...
1166 */
1167
1168 if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
1169 return (EOF);
1170
1171 ch = ((ch & 0x1f) << 6) | (temp & 0x3f);
1172
1173 if (ch < 0x80)
1174 {
1175 mxml_error("Invalid UTF-8 sequence for character 0x%04x!", ch);
1176 return (EOF);
1177 }
1178 }
1179 else if ((ch & 0xf0) == 0xe0)
1180 {
1181 /*
1182 * Three-byte value...
1183 */
1184
1185 if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
1186 return (EOF);
1187
1188 ch = ((ch & 0x0f) << 6) | (temp & 0x3f);
1189
1190 if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
1191 return (EOF);
1192
1193 ch = (ch << 6) | (temp & 0x3f);
1194
1195 if (ch < 0x800)
1196 {
1197 mxml_error("Invalid UTF-8 sequence for character 0x%04x!", ch);
1198 return (EOF);
1199 }
1200
1201 /*
1202 * Ignore (strip) Byte Order Mark (BOM)...
1203 */
1204
1205 if (ch == 0xfeff)
1206 return (mxml_file_getc(p, encoding));
1207 }
1208 else if ((ch & 0xf8) == 0xf0)
1209 {
1210 /*
1211 * Four-byte value...
1212 */
1213
1214 if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
1215 return (EOF);
1216
1217 ch = ((ch & 0x07) << 6) | (temp & 0x3f);
1218
1219 if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
1220 return (EOF);
1221
1222 ch = (ch << 6) | (temp & 0x3f);
1223
1224 if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
1225 return (EOF);
1226
1227 ch = (ch << 6) | (temp & 0x3f);
1228
1229 if (ch < 0x10000)
1230 {
1231 mxml_error("Invalid UTF-8 sequence for character 0x%04x!", ch);
1232 return (EOF);
1233 }
1234 }
1235 else
1236 return (EOF);
1237 break;
1238
1239 case ENCODE_UTF16BE :
1240 /*
1241 * Read UTF-16 big-endian char...
1242 */
1243
1244 ch = (ch << 8) | getc(fp);
1245
1246 if (mxml_bad_char(ch))
1247 {
1248 mxml_error("Bad control character 0x%02x not allowed by XML standard!",
1249 ch);
1250 return (EOF);
1251 }
1252 else if (ch >= 0xd800 && ch <= 0xdbff)
1253 {
1254 /*
1255 * Multi-word UTF-16 char...
1256 */
1257
Jon Medhurst96b56152014-10-30 18:01:15 +00001258 int lch = getc(fp);
1259 lch = (lch << 8) | getc(fp);
Jon Medhurstaaf37a32013-06-11 12:10:56 +01001260
1261 if (lch < 0xdc00 || lch >= 0xdfff)
1262 return (EOF);
1263
1264 ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
1265 }
1266 break;
1267
1268 case ENCODE_UTF16LE :
1269 /*
1270 * Read UTF-16 little-endian char...
1271 */
1272
1273 ch |= (getc(fp) << 8);
1274
1275 if (mxml_bad_char(ch))
1276 {
1277 mxml_error("Bad control character 0x%02x not allowed by XML standard!",
1278 ch);
1279 return (EOF);
1280 }
1281 else if (ch >= 0xd800 && ch <= 0xdbff)
1282 {
1283 /*
1284 * Multi-word UTF-16 char...
1285 */
1286
1287 int lch = getc(fp);
Jon Medhurst96b56152014-10-30 18:01:15 +00001288 lch |= (getc(fp) << 8);
Jon Medhurstaaf37a32013-06-11 12:10:56 +01001289
1290 if (lch < 0xdc00 || lch >= 0xdfff)
1291 return (EOF);
1292
1293 ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
1294 }
1295 break;
1296 }
1297
1298#if DEBUG > 1
1299 printf("mxml_file_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
1300#endif /* DEBUG > 1 */
1301
1302 return (ch);
1303}
1304
1305
1306/*
1307 * 'mxml_file_putc()' - Write a character to a file.
1308 */
1309
1310static int /* O - 0 on success, -1 on failure */
1311mxml_file_putc(int ch, /* I - Character to write */
1312 void *p) /* I - Pointer to file */
1313{
1314 return (putc(ch, (FILE *)p) == EOF ? -1 : 0);
1315}
1316
1317
1318/*
1319 * 'mxml_get_entity()' - Get the character corresponding to an entity...
1320 */
1321
1322static int /* O - Character value or EOF on error */
1323mxml_get_entity(mxml_node_t *parent, /* I - Parent node */
1324 void *p, /* I - Pointer to source */
1325 int *encoding, /* IO - Character encoding */
1326 int (*getc_cb)(void *, int *))
1327 /* I - Get character function */
1328{
1329 int ch; /* Current character */
1330 char entity[64], /* Entity string */
1331 *entptr; /* Pointer into entity */
1332
1333
1334 entptr = entity;
1335
1336 while ((ch = (*getc_cb)(p, encoding)) != EOF)
1337 if (ch > 126 || (!isalnum(ch) && ch != '#'))
1338 break;
1339 else if (entptr < (entity + sizeof(entity) - 1))
1340 *entptr++ = ch;
1341 else
1342 {
1343 mxml_error("Entity name too long under parent <%s>!",
1344 parent ? parent->value.element.name : "null");
1345 break;
1346 }
1347
1348 *entptr = '\0';
1349
1350 if (ch != ';')
1351 {
1352 mxml_error("Character entity \"%s\" not terminated under parent <%s>!",
1353 entity, parent ? parent->value.element.name : "null");
1354 return (EOF);
1355 }
1356
1357 if (entity[0] == '#')
1358 {
1359 if (entity[1] == 'x')
1360 ch = strtol(entity + 2, NULL, 16);
1361 else
1362 ch = strtol(entity + 1, NULL, 10);
1363 }
1364 else if ((ch = mxmlEntityGetValue(entity)) < 0)
1365 mxml_error("Entity name \"%s;\" not supported under parent <%s>!",
1366 entity, parent ? parent->value.element.name : "null");
1367
1368 if (mxml_bad_char(ch))
1369 {
1370 mxml_error("Bad control character 0x%02x under parent <%s> not allowed by XML standard!",
1371 ch, parent ? parent->value.element.name : "null");
1372 return (EOF);
1373 }
1374
1375 return (ch);
1376}
1377
1378
1379/*
1380 * 'mxml_load_data()' - Load data into an XML node tree.
1381 */
1382
1383static mxml_node_t * /* O - First node or NULL if the file could not be read. */
1384mxml_load_data(
1385 mxml_node_t *top, /* I - Top node */
1386 void *p, /* I - Pointer to data */
1387 mxml_load_cb_t cb, /* I - Callback function or MXML_NO_CALLBACK */
1388 _mxml_getc_cb_t getc_cb, /* I - Read function */
1389 mxml_sax_cb_t sax_cb, /* I - SAX callback or MXML_NO_CALLBACK */
1390 void *sax_data) /* I - SAX user data */
1391{
1392 mxml_node_t *node, /* Current node */
1393 *first, /* First node added */
1394 *parent; /* Current parent node */
1395 int ch, /* Character from file */
1396 whitespace; /* Non-zero if whitespace seen */
1397 char *buffer, /* String buffer */
1398 *bufptr; /* Pointer into buffer */
1399 int bufsize; /* Size of buffer */
1400 mxml_type_t type; /* Current node type */
1401 int encoding; /* Character encoding */
1402 _mxml_global_t *global = _mxml_global();
1403 /* Global data */
1404 static const char * const types[] = /* Type strings... */
1405 {
1406 "MXML_ELEMENT", /* XML element with attributes */
1407 "MXML_INTEGER", /* Integer value */
1408 "MXML_OPAQUE", /* Opaque string */
1409 "MXML_REAL", /* Real value */
1410 "MXML_TEXT", /* Text fragment */
1411 "MXML_CUSTOM" /* Custom data */
1412 };
1413
1414
1415 /*
1416 * Read elements and other nodes from the file...
1417 */
1418
1419 if ((buffer = malloc(64)) == NULL)
1420 {
1421 mxml_error("Unable to allocate string buffer!");
1422 return (NULL);
1423 }
1424
1425 bufsize = 64;
1426 bufptr = buffer;
1427 parent = top;
1428 first = NULL;
1429 whitespace = 0;
1430 encoding = ENCODE_UTF8;
1431
1432 if (cb && parent)
1433 type = (*cb)(parent);
Jon Medhurst96b56152014-10-30 18:01:15 +00001434 else if (parent)
Jon Medhurstaaf37a32013-06-11 12:10:56 +01001435 type = MXML_TEXT;
Jon Medhurst96b56152014-10-30 18:01:15 +00001436 else
1437 type = MXML_IGNORE;
Jon Medhurstaaf37a32013-06-11 12:10:56 +01001438
1439 while ((ch = (*getc_cb)(p, &encoding)) != EOF)
1440 {
1441 if ((ch == '<' ||
1442 (mxml_isspace(ch) && type != MXML_OPAQUE && type != MXML_CUSTOM)) &&
1443 bufptr > buffer)
1444 {
1445 /*
1446 * Add a new value node...
1447 */
1448
1449 *bufptr = '\0';
1450
1451 switch (type)
1452 {
1453 case MXML_INTEGER :
1454 node = mxmlNewInteger(parent, strtol(buffer, &bufptr, 0));
1455 break;
1456
1457 case MXML_OPAQUE :
1458 node = mxmlNewOpaque(parent, buffer);
1459 break;
1460
1461 case MXML_REAL :
1462 node = mxmlNewReal(parent, strtod(buffer, &bufptr));
1463 break;
1464
1465 case MXML_TEXT :
1466 node = mxmlNewText(parent, whitespace, buffer);
1467 break;
1468
1469 case MXML_CUSTOM :
1470 if (global->custom_load_cb)
1471 {
1472 /*
1473 * Use the callback to fill in the custom data...
1474 */
1475
1476 node = mxmlNewCustom(parent, NULL, NULL);
1477
1478 if ((*global->custom_load_cb)(node, buffer))
1479 {
1480 mxml_error("Bad custom value '%s' in parent <%s>!",
1481 buffer, parent ? parent->value.element.name : "null");
1482 mxmlDelete(node);
1483 node = NULL;
1484 }
1485 break;
1486 }
1487
1488 default : /* Ignore... */
1489 node = NULL;
1490 break;
Jon Medhurst96b56152014-10-30 18:01:15 +00001491 }
Jon Medhurstaaf37a32013-06-11 12:10:56 +01001492
1493 if (*bufptr)
1494 {
1495 /*
1496 * Bad integer/real number value...
1497 */
1498
1499 mxml_error("Bad %s value '%s' in parent <%s>!",
1500 type == MXML_INTEGER ? "integer" : "real", buffer,
1501 parent ? parent->value.element.name : "null");
1502 break;
1503 }
1504
1505 bufptr = buffer;
1506 whitespace = mxml_isspace(ch) && type == MXML_TEXT;
1507
1508 if (!node && type != MXML_IGNORE)
1509 {
1510 /*
1511 * Print error and return...
1512 */
1513
1514 mxml_error("Unable to add value node of type %s to parent <%s>!",
1515 types[type], parent ? parent->value.element.name : "null");
1516 goto error;
1517 }
1518
1519 if (sax_cb)
1520 {
1521 (*sax_cb)(node, MXML_SAX_DATA, sax_data);
1522
1523 if (!mxmlRelease(node))
1524 node = NULL;
1525 }
1526
1527 if (!first && node)
1528 first = node;
1529 }
1530 else if (mxml_isspace(ch) && type == MXML_TEXT)
1531 whitespace = 1;
1532
1533 /*
1534 * Add lone whitespace node if we have an element and existing
1535 * whitespace...
1536 */
1537
1538 if (ch == '<' && whitespace && type == MXML_TEXT)
1539 {
1540 if (parent)
1541 {
1542 node = mxmlNewText(parent, whitespace, "");
1543
1544 if (sax_cb)
1545 {
1546 (*sax_cb)(node, MXML_SAX_DATA, sax_data);
1547
1548 if (!mxmlRelease(node))
1549 node = NULL;
1550 }
1551
1552 if (!first && node)
1553 first = node;
1554 }
1555
1556 whitespace = 0;
1557 }
1558
1559 if (ch == '<')
1560 {
1561 /*
1562 * Start of open/close tag...
1563 */
1564
1565 bufptr = buffer;
1566
1567 while ((ch = (*getc_cb)(p, &encoding)) != EOF)
1568 if (mxml_isspace(ch) || ch == '>' || (ch == '/' && bufptr > buffer))
1569 break;
1570 else if (ch == '<')
1571 {
1572 mxml_error("Bare < in element!");
1573 goto error;
1574 }
1575 else if (ch == '&')
1576 {
1577 if ((ch = mxml_get_entity(parent, p, &encoding, getc_cb)) == EOF)
1578 goto error;
1579
1580 if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
1581 goto error;
1582 }
1583 else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
1584 goto error;
1585 else if (((bufptr - buffer) == 1 && buffer[0] == '?') ||
1586 ((bufptr - buffer) == 3 && !strncmp(buffer, "!--", 3)) ||
1587 ((bufptr - buffer) == 8 && !strncmp(buffer, "![CDATA[", 8)))
1588 break;
1589
1590 *bufptr = '\0';
1591
1592 if (!strcmp(buffer, "!--"))
1593 {
1594 /*
1595 * Gather rest of comment...
1596 */
1597
1598 while ((ch = (*getc_cb)(p, &encoding)) != EOF)
1599 {
1600 if (ch == '>' && bufptr > (buffer + 4) &&
1601 bufptr[-3] != '-' && bufptr[-2] == '-' && bufptr[-1] == '-')
1602 break;
1603 else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
1604 goto error;
1605 }
1606
1607 /*
1608 * Error out if we didn't get the whole comment...
1609 */
1610
1611 if (ch != '>')
1612 {
1613 /*
1614 * Print error and return...
1615 */
1616
1617 mxml_error("Early EOF in comment node!");
1618 goto error;
1619 }
1620
1621
1622 /*
1623 * Otherwise add this as an element under the current parent...
1624 */
1625
1626 *bufptr = '\0';
1627
1628 if (!parent && first)
1629 {
1630 /*
1631 * There can only be one root element!
1632 */
1633
Jon Medhurst96b56152014-10-30 18:01:15 +00001634 mxml_error("<%s> cannot be a second root node after <%s>",
Jon Medhurstaaf37a32013-06-11 12:10:56 +01001635 buffer, first->value.element.name);
Jon Medhurst96b56152014-10-30 18:01:15 +00001636 goto error;
Jon Medhurstaaf37a32013-06-11 12:10:56 +01001637 }
1638
1639 if ((node = mxmlNewElement(parent, buffer)) == NULL)
1640 {
1641 /*
1642 * Just print error for now...
1643 */
1644
1645 mxml_error("Unable to add comment node to parent <%s>!",
1646 parent ? parent->value.element.name : "null");
1647 break;
1648 }
1649
1650 if (sax_cb)
1651 {
1652 (*sax_cb)(node, MXML_SAX_COMMENT, sax_data);
1653
1654 if (!mxmlRelease(node))
1655 node = NULL;
1656 }
1657
1658 if (node && !first)
1659 first = node;
1660 }
1661 else if (!strcmp(buffer, "![CDATA["))
1662 {
1663 /*
1664 * Gather CDATA section...
1665 */
1666
1667 while ((ch = (*getc_cb)(p, &encoding)) != EOF)
1668 {
1669 if (ch == '>' && !strncmp(bufptr - 2, "]]", 2))
1670 break;
1671 else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
1672 goto error;
1673 }
1674
1675 /*
1676 * Error out if we didn't get the whole comment...
1677 */
1678
1679 if (ch != '>')
1680 {
1681 /*
1682 * Print error and return...
1683 */
1684
1685 mxml_error("Early EOF in CDATA node!");
1686 goto error;
1687 }
1688
1689
1690 /*
1691 * Otherwise add this as an element under the current parent...
1692 */
1693
1694 *bufptr = '\0';
1695
1696 if (!parent && first)
1697 {
1698 /*
1699 * There can only be one root element!
1700 */
1701
Jon Medhurst96b56152014-10-30 18:01:15 +00001702 mxml_error("<%s> cannot be a second root node after <%s>",
Jon Medhurstaaf37a32013-06-11 12:10:56 +01001703 buffer, first->value.element.name);
Jon Medhurst96b56152014-10-30 18:01:15 +00001704 goto error;
Jon Medhurstaaf37a32013-06-11 12:10:56 +01001705 }
1706
1707 if ((node = mxmlNewElement(parent, buffer)) == NULL)
1708 {
1709 /*
1710 * Print error and return...
1711 */
1712
1713 mxml_error("Unable to add CDATA node to parent <%s>!",
1714 parent ? parent->value.element.name : "null");
1715 goto error;
1716 }
1717
1718 if (sax_cb)
1719 {
1720 (*sax_cb)(node, MXML_SAX_CDATA, sax_data);
1721
1722 if (!mxmlRelease(node))
1723 node = NULL;
1724 }
1725
1726 if (node && !first)
1727 first = node;
1728 }
1729 else if (buffer[0] == '?')
1730 {
1731 /*
1732 * Gather rest of processing instruction...
1733 */
1734
1735 while ((ch = (*getc_cb)(p, &encoding)) != EOF)
1736 {
1737 if (ch == '>' && bufptr > buffer && bufptr[-1] == '?')
1738 break;
1739 else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
1740 goto error;
1741 }
1742
1743 /*
1744 * Error out if we didn't get the whole processing instruction...
1745 */
1746
1747 if (ch != '>')
1748 {
1749 /*
1750 * Print error and return...
1751 */
1752
1753 mxml_error("Early EOF in processing instruction node!");
1754 goto error;
1755 }
1756
1757 /*
1758 * Otherwise add this as an element under the current parent...
1759 */
1760
1761 *bufptr = '\0';
1762
1763 if (!parent && first)
1764 {
1765 /*
1766 * There can only be one root element!
1767 */
1768
Jon Medhurst96b56152014-10-30 18:01:15 +00001769 mxml_error("<%s> cannot be a second root node after <%s>",
Jon Medhurstaaf37a32013-06-11 12:10:56 +01001770 buffer, first->value.element.name);
Jon Medhurst96b56152014-10-30 18:01:15 +00001771 goto error;
Jon Medhurstaaf37a32013-06-11 12:10:56 +01001772 }
1773
1774 if ((node = mxmlNewElement(parent, buffer)) == NULL)
1775 {
1776 /*
1777 * Print error and return...
1778 */
1779
1780 mxml_error("Unable to add processing instruction node to parent <%s>!",
1781 parent ? parent->value.element.name : "null");
1782 goto error;
1783 }
1784
1785 if (sax_cb)
1786 {
1787 (*sax_cb)(node, MXML_SAX_DIRECTIVE, sax_data);
1788
1789 if (!mxmlRelease(node))
1790 node = NULL;
1791 }
1792
1793 if (node)
1794 {
1795 if (!first)
1796 first = node;
1797
1798 if (!parent)
1799 {
1800 parent = node;
1801
1802 if (cb)
1803 type = (*cb)(parent);
1804 }
1805 }
1806 }
1807 else if (buffer[0] == '!')
1808 {
1809 /*
1810 * Gather rest of declaration...
1811 */
1812
1813 do
1814 {
1815 if (ch == '>')
1816 break;
1817 else
1818 {
1819 if (ch == '&')
1820 if ((ch = mxml_get_entity(parent, p, &encoding, getc_cb)) == EOF)
1821 goto error;
1822
1823 if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
1824 goto error;
1825 }
1826 }
1827 while ((ch = (*getc_cb)(p, &encoding)) != EOF);
1828
1829 /*
1830 * Error out if we didn't get the whole declaration...
1831 */
1832
1833 if (ch != '>')
1834 {
1835 /*
1836 * Print error and return...
1837 */
1838
1839 mxml_error("Early EOF in declaration node!");
1840 goto error;
1841 }
1842
1843 /*
1844 * Otherwise add this as an element under the current parent...
1845 */
1846
1847 *bufptr = '\0';
1848
1849 if (!parent && first)
1850 {
1851 /*
1852 * There can only be one root element!
1853 */
1854
Jon Medhurst96b56152014-10-30 18:01:15 +00001855 mxml_error("<%s> cannot be a second root node after <%s>",
Jon Medhurstaaf37a32013-06-11 12:10:56 +01001856 buffer, first->value.element.name);
Jon Medhurst96b56152014-10-30 18:01:15 +00001857 goto error;
Jon Medhurstaaf37a32013-06-11 12:10:56 +01001858 }
1859
1860 if ((node = mxmlNewElement(parent, buffer)) == NULL)
1861 {
1862 /*
1863 * Print error and return...
1864 */
1865
1866 mxml_error("Unable to add declaration node to parent <%s>!",
1867 parent ? parent->value.element.name : "null");
1868 goto error;
1869 }
1870
1871 if (sax_cb)
1872 {
1873 (*sax_cb)(node, MXML_SAX_DIRECTIVE, sax_data);
1874
1875 if (!mxmlRelease(node))
1876 node = NULL;
1877 }
1878
1879 if (node)
1880 {
1881 if (!first)
1882 first = node;
1883
1884 if (!parent)
1885 {
1886 parent = node;
1887
1888 if (cb)
1889 type = (*cb)(parent);
1890 }
1891 }
1892 }
1893 else if (buffer[0] == '/')
1894 {
1895 /*
1896 * Handle close tag...
1897 */
1898
1899 if (!parent || strcmp(buffer + 1, parent->value.element.name))
1900 {
1901 /*
1902 * Close tag doesn't match tree; print an error for now...
1903 */
1904
1905 mxml_error("Mismatched close tag <%s> under parent <%s>!",
1906 buffer, parent ? parent->value.element.name : "(null)");
1907 goto error;
1908 }
1909
1910 /*
1911 * Keep reading until we see >...
1912 */
1913
1914 while (ch != '>' && ch != EOF)
1915 ch = (*getc_cb)(p, &encoding);
1916
1917 node = parent;
1918 parent = parent->parent;
1919
1920 if (sax_cb)
1921 {
1922 (*sax_cb)(node, MXML_SAX_ELEMENT_CLOSE, sax_data);
1923
1924 if (!mxmlRelease(node) && first == node)
1925 first = NULL;
1926 }
1927
1928 /*
1929 * Ascend into the parent and set the value type as needed...
1930 */
1931
1932 if (cb && parent)
1933 type = (*cb)(parent);
1934 }
1935 else
1936 {
1937 /*
1938 * Handle open tag...
1939 */
1940
1941 if (!parent && first)
1942 {
1943 /*
1944 * There can only be one root element!
1945 */
1946
Jon Medhurst96b56152014-10-30 18:01:15 +00001947 mxml_error("<%s> cannot be a second root node after <%s>",
Jon Medhurstaaf37a32013-06-11 12:10:56 +01001948 buffer, first->value.element.name);
Jon Medhurst96b56152014-10-30 18:01:15 +00001949 goto error;
Jon Medhurstaaf37a32013-06-11 12:10:56 +01001950 }
1951
1952 if ((node = mxmlNewElement(parent, buffer)) == NULL)
1953 {
1954 /*
1955 * Just print error for now...
1956 */
1957
1958 mxml_error("Unable to add element node to parent <%s>!",
1959 parent ? parent->value.element.name : "null");
1960 goto error;
1961 }
1962
1963 if (mxml_isspace(ch))
1964 {
1965 if ((ch = mxml_parse_element(node, p, &encoding, getc_cb)) == EOF)
1966 goto error;
1967 }
1968 else if (ch == '/')
1969 {
1970 if ((ch = (*getc_cb)(p, &encoding)) != '>')
1971 {
1972 mxml_error("Expected > but got '%c' instead for element <%s/>!",
1973 ch, buffer);
1974 mxmlDelete(node);
1975 goto error;
1976 }
1977
1978 ch = '/';
1979 }
1980
1981 if (sax_cb)
1982 (*sax_cb)(node, MXML_SAX_ELEMENT_OPEN, sax_data);
1983
1984 if (!first)
1985 first = node;
1986
1987 if (ch == EOF)
1988 break;
1989
1990 if (ch != '/')
1991 {
1992 /*
1993 * Descend into this node, setting the value type as needed...
1994 */
1995
1996 parent = node;
1997
1998 if (cb && parent)
1999 type = (*cb)(parent);
2000 }
2001 else if (sax_cb)
2002 {
2003 (*sax_cb)(node, MXML_SAX_ELEMENT_CLOSE, sax_data);
2004
2005 if (!mxmlRelease(node) && first == node)
2006 first = NULL;
2007 }
2008 }
2009
2010 bufptr = buffer;
2011 }
2012 else if (ch == '&')
2013 {
2014 /*
2015 * Add character entity to current buffer...
2016 */
2017
2018 if ((ch = mxml_get_entity(parent, p, &encoding, getc_cb)) == EOF)
2019 goto error;
2020
2021 if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
2022 goto error;
2023 }
2024 else if (type == MXML_OPAQUE || type == MXML_CUSTOM || !mxml_isspace(ch))
2025 {
2026 /*
2027 * Add character to current buffer...
2028 */
2029
2030 if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
2031 goto error;
2032 }
2033 }
2034
2035 /*
2036 * Free the string buffer - we don't need it anymore...
2037 */
2038
2039 free(buffer);
2040
2041 /*
2042 * Find the top element and return it...
2043 */
2044
2045 if (parent)
2046 {
2047 node = parent;
2048
Jon Medhurst96b56152014-10-30 18:01:15 +00002049 while (parent != top && parent->parent)
Jon Medhurstaaf37a32013-06-11 12:10:56 +01002050 parent = parent->parent;
2051
2052 if (node != parent)
2053 {
2054 mxml_error("Missing close tag </%s> under parent <%s>!",
2055 node->value.element.name,
2056 node->parent ? node->parent->value.element.name : "(null)");
2057
2058 mxmlDelete(first);
2059
2060 return (NULL);
2061 }
2062 }
2063
2064 if (parent)
2065 return (parent);
2066 else
2067 return (first);
2068
2069 /*
2070 * Common error return...
2071 */
2072
2073error:
2074
2075 mxmlDelete(first);
2076
2077 free(buffer);
2078
2079 return (NULL);
2080}
2081
2082
2083/*
2084 * 'mxml_parse_element()' - Parse an element for any attributes...
2085 */
2086
2087static int /* O - Terminating character */
2088mxml_parse_element(
2089 mxml_node_t *node, /* I - Element node */
2090 void *p, /* I - Data to read from */
2091 int *encoding, /* IO - Encoding */
2092 _mxml_getc_cb_t getc_cb) /* I - Data callback */
2093{
2094 int ch, /* Current character in file */
2095 quote; /* Quoting character */
2096 char *name, /* Attribute name */
2097 *value, /* Attribute value */
2098 *ptr; /* Pointer into name/value */
2099 int namesize, /* Size of name string */
2100 valsize; /* Size of value string */
2101
2102
2103 /*
2104 * Initialize the name and value buffers...
2105 */
2106
2107 if ((name = malloc(64)) == NULL)
2108 {
2109 mxml_error("Unable to allocate memory for name!");
2110 return (EOF);
2111 }
2112
2113 namesize = 64;
2114
2115 if ((value = malloc(64)) == NULL)
2116 {
2117 free(name);
2118 mxml_error("Unable to allocate memory for value!");
2119 return (EOF);
2120 }
2121
2122 valsize = 64;
2123
2124 /*
2125 * Loop until we hit a >, /, ?, or EOF...
2126 */
2127
2128 while ((ch = (*getc_cb)(p, encoding)) != EOF)
2129 {
2130#if DEBUG > 1
2131 fprintf(stderr, "parse_element: ch='%c'\n", ch);
2132#endif /* DEBUG > 1 */
2133
2134 /*
2135 * Skip leading whitespace...
2136 */
2137
2138 if (mxml_isspace(ch))
2139 continue;
2140
2141 /*
2142 * Stop at /, ?, or >...
2143 */
2144
2145 if (ch == '/' || ch == '?')
2146 {
2147 /*
2148 * Grab the > character and print an error if it isn't there...
2149 */
2150
2151 quote = (*getc_cb)(p, encoding);
2152
2153 if (quote != '>')
2154 {
2155 mxml_error("Expected '>' after '%c' for element %s, but got '%c'!",
2156 ch, node->value.element.name, quote);
2157 goto error;
2158 }
2159
2160 break;
2161 }
2162 else if (ch == '<')
2163 {
2164 mxml_error("Bare < in element %s!", node->value.element.name);
2165 goto error;
2166 }
2167 else if (ch == '>')
2168 break;
2169
2170 /*
2171 * Read the attribute name...
2172 */
2173
2174 name[0] = ch;
2175 ptr = name + 1;
2176
2177 if (ch == '\"' || ch == '\'')
2178 {
2179 /*
2180 * Name is in quotes, so get a quoted string...
2181 */
2182
2183 quote = ch;
2184
2185 while ((ch = (*getc_cb)(p, encoding)) != EOF)
2186 {
2187 if (ch == '&')
2188 if ((ch = mxml_get_entity(node, p, encoding, getc_cb)) == EOF)
2189 goto error;
2190
2191 if (mxml_add_char(ch, &ptr, &name, &namesize))
2192 goto error;
2193
2194 if (ch == quote)
2195 break;
2196 }
2197 }
2198 else
2199 {
2200 /*
2201 * Grab an normal, non-quoted name...
2202 */
2203
2204 while ((ch = (*getc_cb)(p, encoding)) != EOF)
2205 if (mxml_isspace(ch) || ch == '=' || ch == '/' || ch == '>' ||
2206 ch == '?')
2207 break;
2208 else
2209 {
2210 if (ch == '&')
2211 if ((ch = mxml_get_entity(node, p, encoding, getc_cb)) == EOF)
2212 goto error;
2213
2214 if (mxml_add_char(ch, &ptr, &name, &namesize))
2215 goto error;
2216 }
2217 }
2218
2219 *ptr = '\0';
2220
2221 if (mxmlElementGetAttr(node, name))
2222 goto error;
2223
2224 while (ch != EOF && mxml_isspace(ch))
2225 ch = (*getc_cb)(p, encoding);
2226
2227 if (ch == '=')
2228 {
2229 /*
2230 * Read the attribute value...
2231 */
2232
2233 while ((ch = (*getc_cb)(p, encoding)) != EOF && mxml_isspace(ch));
2234
2235 if (ch == EOF)
2236 {
2237 mxml_error("Missing value for attribute '%s' in element %s!",
2238 name, node->value.element.name);
2239 goto error;
2240 }
2241
2242 if (ch == '\'' || ch == '\"')
2243 {
2244 /*
2245 * Read quoted value...
2246 */
2247
2248 quote = ch;
2249 ptr = value;
2250
2251 while ((ch = (*getc_cb)(p, encoding)) != EOF)
2252 if (ch == quote)
2253 break;
2254 else
2255 {
2256 if (ch == '&')
2257 if ((ch = mxml_get_entity(node, p, encoding, getc_cb)) == EOF)
2258 goto error;
Jon Medhurst96b56152014-10-30 18:01:15 +00002259
Jon Medhurstaaf37a32013-06-11 12:10:56 +01002260 if (mxml_add_char(ch, &ptr, &value, &valsize))
2261 goto error;
2262 }
2263
2264 *ptr = '\0';
2265 }
2266 else
2267 {
2268 /*
2269 * Read unquoted value...
2270 */
2271
2272 value[0] = ch;
2273 ptr = value + 1;
2274
2275 while ((ch = (*getc_cb)(p, encoding)) != EOF)
2276 if (mxml_isspace(ch) || ch == '=' || ch == '/' || ch == '>')
2277 break;
2278 else
2279 {
2280 if (ch == '&')
2281 if ((ch = mxml_get_entity(node, p, encoding, getc_cb)) == EOF)
2282 goto error;
Jon Medhurst96b56152014-10-30 18:01:15 +00002283
Jon Medhurstaaf37a32013-06-11 12:10:56 +01002284 if (mxml_add_char(ch, &ptr, &value, &valsize))
2285 goto error;
2286 }
2287
2288 *ptr = '\0';
2289 }
2290
2291 /*
2292 * Set the attribute with the given string value...
2293 */
2294
2295 mxmlElementSetAttr(node, name, value);
2296 }
2297 else
2298 {
2299 mxml_error("Missing value for attribute '%s' in element %s!",
2300 name, node->value.element.name);
2301 goto error;
2302 }
2303
2304 /*
2305 * Check the end character...
2306 */
2307
2308 if (ch == '/' || ch == '?')
2309 {
2310 /*
2311 * Grab the > character and print an error if it isn't there...
2312 */
2313
2314 quote = (*getc_cb)(p, encoding);
2315
2316 if (quote != '>')
2317 {
2318 mxml_error("Expected '>' after '%c' for element %s, but got '%c'!",
2319 ch, node->value.element.name, quote);
2320 ch = EOF;
2321 }
2322
2323 break;
2324 }
2325 else if (ch == '>')
2326 break;
2327 }
2328
2329 /*
2330 * Free the name and value buffers and return...
2331 */
2332
2333 free(name);
2334 free(value);
2335
2336 return (ch);
2337
2338 /*
2339 * Common error return point...
2340 */
2341
2342error:
2343
2344 free(name);
2345 free(value);
2346
2347 return (EOF);
2348}
2349
2350
2351/*
2352 * 'mxml_string_getc()' - Get a character from a string.
2353 */
2354
2355static int /* O - Character or EOF */
2356mxml_string_getc(void *p, /* I - Pointer to file */
2357 int *encoding) /* IO - Encoding */
2358{
2359 int ch; /* Character */
2360 const char **s; /* Pointer to string pointer */
2361
2362
2363 s = (const char **)p;
2364
2365 if ((ch = (*s)[0] & 255) != 0 || *encoding == ENCODE_UTF16LE)
2366 {
2367 /*
2368 * Got character; convert UTF-8 to integer and return...
2369 */
2370
2371 (*s)++;
2372
2373 switch (*encoding)
2374 {
2375 case ENCODE_UTF8 :
2376 if (!(ch & 0x80))
2377 {
2378#if DEBUG > 1
2379 printf("mxml_string_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
2380#endif /* DEBUG > 1 */
2381
2382 if (mxml_bad_char(ch))
2383 {
2384 mxml_error("Bad control character 0x%02x not allowed by XML standard!",
2385 ch);
2386 return (EOF);
2387 }
2388
2389 return (ch);
2390 }
2391 else if (ch == 0xfe)
2392 {
2393 /*
2394 * UTF-16 big-endian BOM?
2395 */
2396
2397 if (((*s)[0] & 255) != 0xff)
2398 return (EOF);
2399
2400 *encoding = ENCODE_UTF16BE;
2401 (*s)++;
2402
2403 return (mxml_string_getc(p, encoding));
2404 }
2405 else if (ch == 0xff)
2406 {
2407 /*
2408 * UTF-16 little-endian BOM?
2409 */
2410
2411 if (((*s)[0] & 255) != 0xfe)
2412 return (EOF);
2413
2414 *encoding = ENCODE_UTF16LE;
2415 (*s)++;
2416
2417 return (mxml_string_getc(p, encoding));
2418 }
2419 else if ((ch & 0xe0) == 0xc0)
2420 {
2421 /*
2422 * Two-byte value...
2423 */
2424
2425 if (((*s)[0] & 0xc0) != 0x80)
2426 return (EOF);
2427
2428 ch = ((ch & 0x1f) << 6) | ((*s)[0] & 0x3f);
2429
2430 (*s)++;
2431
2432 if (ch < 0x80)
2433 {
2434 mxml_error("Invalid UTF-8 sequence for character 0x%04x!", ch);
2435 return (EOF);
2436 }
2437
2438#if DEBUG > 1
2439 printf("mxml_string_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
2440#endif /* DEBUG > 1 */
2441
2442 return (ch);
2443 }
2444 else if ((ch & 0xf0) == 0xe0)
2445 {
2446 /*
2447 * Three-byte value...
2448 */
2449
2450 if (((*s)[0] & 0xc0) != 0x80 ||
2451 ((*s)[1] & 0xc0) != 0x80)
2452 return (EOF);
2453
2454 ch = ((((ch & 0x0f) << 6) | ((*s)[0] & 0x3f)) << 6) | ((*s)[1] & 0x3f);
2455
2456 (*s) += 2;
2457
2458 if (ch < 0x800)
2459 {
2460 mxml_error("Invalid UTF-8 sequence for character 0x%04x!", ch);
2461 return (EOF);
2462 }
2463
2464 /*
2465 * Ignore (strip) Byte Order Mark (BOM)...
2466 */
2467
2468 if (ch == 0xfeff)
2469 return (mxml_string_getc(p, encoding));
2470
2471#if DEBUG > 1
2472 printf("mxml_string_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
2473#endif /* DEBUG > 1 */
2474
2475 return (ch);
2476 }
2477 else if ((ch & 0xf8) == 0xf0)
2478 {
2479 /*
2480 * Four-byte value...
2481 */
2482
2483 if (((*s)[0] & 0xc0) != 0x80 ||
2484 ((*s)[1] & 0xc0) != 0x80 ||
2485 ((*s)[2] & 0xc0) != 0x80)
2486 return (EOF);
2487
2488 ch = ((((((ch & 0x07) << 6) | ((*s)[0] & 0x3f)) << 6) |
2489 ((*s)[1] & 0x3f)) << 6) | ((*s)[2] & 0x3f);
2490
2491 (*s) += 3;
2492
2493 if (ch < 0x10000)
2494 {
2495 mxml_error("Invalid UTF-8 sequence for character 0x%04x!", ch);
2496 return (EOF);
2497 }
2498
2499#if DEBUG > 1
2500 printf("mxml_string_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
2501#endif /* DEBUG > 1 */
2502
2503 return (ch);
2504 }
2505 else
2506 return (EOF);
2507
2508 case ENCODE_UTF16BE :
2509 /*
2510 * Read UTF-16 big-endian char...
2511 */
2512
2513 ch = (ch << 8) | ((*s)[0] & 255);
2514 (*s) ++;
2515
2516 if (mxml_bad_char(ch))
2517 {
2518 mxml_error("Bad control character 0x%02x not allowed by XML standard!",
2519 ch);
2520 return (EOF);
2521 }
2522 else if (ch >= 0xd800 && ch <= 0xdbff)
2523 {
2524 /*
2525 * Multi-word UTF-16 char...
2526 */
2527
2528 int lch; /* Lower word */
2529
2530
2531 if (!(*s)[0])
2532 return (EOF);
2533
2534 lch = (((*s)[0] & 255) << 8) | ((*s)[1] & 255);
2535 (*s) += 2;
2536
2537 if (lch < 0xdc00 || lch >= 0xdfff)
2538 return (EOF);
2539
2540 ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
2541 }
2542
2543#if DEBUG > 1
2544 printf("mxml_string_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
2545#endif /* DEBUG > 1 */
2546
2547 return (ch);
2548
2549 case ENCODE_UTF16LE :
2550 /*
2551 * Read UTF-16 little-endian char...
2552 */
2553
2554 ch = ch | (((*s)[0] & 255) << 8);
2555
2556 if (!ch)
2557 {
2558 (*s) --;
2559 return (EOF);
2560 }
2561
2562 (*s) ++;
2563
2564 if (mxml_bad_char(ch))
2565 {
2566 mxml_error("Bad control character 0x%02x not allowed by XML standard!",
2567 ch);
2568 return (EOF);
2569 }
2570 else if (ch >= 0xd800 && ch <= 0xdbff)
2571 {
2572 /*
2573 * Multi-word UTF-16 char...
2574 */
2575
2576 int lch; /* Lower word */
2577
2578
2579 if (!(*s)[1])
2580 return (EOF);
2581
2582 lch = (((*s)[1] & 255) << 8) | ((*s)[0] & 255);
2583 (*s) += 2;
2584
2585 if (lch < 0xdc00 || lch >= 0xdfff)
2586 return (EOF);
2587
2588 ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
2589 }
2590
2591#if DEBUG > 1
2592 printf("mxml_string_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
2593#endif /* DEBUG > 1 */
2594
2595 return (ch);
2596 }
2597 }
2598
2599 return (EOF);
2600}
2601
2602
2603/*
2604 * 'mxml_string_putc()' - Write a character to a string.
2605 */
2606
2607static int /* O - 0 on success, -1 on failure */
2608mxml_string_putc(int ch, /* I - Character to write */
2609 void *p) /* I - Pointer to string pointers */
2610{
2611 char **pp; /* Pointer to string pointers */
2612
2613
2614 pp = (char **)p;
2615
Jon Medhurst96b56152014-10-30 18:01:15 +00002616 if (pp[2] == 0) {
2617 if (pp[0] < pp[1])
2618 pp[0][0] = ch;
2619 else
2620 pp[2] = (char *)1;
2621 }
Jon Medhurstaaf37a32013-06-11 12:10:56 +01002622
2623 pp[0] ++;
2624
2625 return (0);
2626}
2627
2628
2629/*
2630 * 'mxml_write_name()' - Write a name string.
2631 */
2632
2633static int /* O - 0 on success, -1 on failure */
2634mxml_write_name(const char *s, /* I - Name to write */
2635 void *p, /* I - Write pointer */
2636 int (*putc_cb)(int, void *))
2637 /* I - Write callback */
2638{
2639 char quote; /* Quote character */
2640 const char *name; /* Entity name */
2641
2642
2643 if (*s == '\"' || *s == '\'')
2644 {
2645 /*
2646 * Write a quoted name string...
2647 */
2648
2649 if ((*putc_cb)(*s, p) < 0)
2650 return (-1);
2651
2652 quote = *s++;
2653
2654 while (*s && *s != quote)
2655 {
2656 if ((name = mxmlEntityGetName(*s)) != NULL)
2657 {
2658 if ((*putc_cb)('&', p) < 0)
2659 return (-1);
2660
2661 while (*name)
2662 {
2663 if ((*putc_cb)(*name, p) < 0)
2664 return (-1);
2665
2666 name ++;
2667 }
2668
2669 if ((*putc_cb)(';', p) < 0)
2670 return (-1);
2671 }
2672 else if ((*putc_cb)(*s, p) < 0)
2673 return (-1);
2674
2675 s ++;
2676 }
2677
2678 /*
2679 * Write the end quote...
2680 */
2681
2682 if ((*putc_cb)(quote, p) < 0)
2683 return (-1);
2684 }
2685 else
2686 {
2687 /*
2688 * Write a non-quoted name string...
2689 */
2690
2691 while (*s)
2692 {
2693 if ((*putc_cb)(*s, p) < 0)
2694 return (-1);
2695
2696 s ++;
2697 }
2698 }
2699
2700 return (0);
2701}
2702
2703
2704/*
2705 * 'mxml_write_node()' - Save an XML node to a file.
2706 */
2707
2708static int /* O - Column or -1 on error */
2709mxml_write_node(mxml_node_t *node, /* I - Node to write */
2710 void *p, /* I - File to write to */
2711 mxml_save_cb_t cb, /* I - Whitespace callback */
2712 int col, /* I - Current column */
2713 _mxml_putc_cb_t putc_cb,/* I - Output callback */
2714 _mxml_global_t *global)/* I - Global data */
2715{
2716 int i, /* Looping var */
2717 width; /* Width of attr + value */
2718 mxml_attr_t *attr; /* Current attribute */
2719 char s[255]; /* Temporary string */
2720
2721
2722 /*
2723 * Print the node value...
2724 */
2725
2726 switch (node->type)
2727 {
2728 case MXML_ELEMENT :
2729 col = mxml_write_ws(node, p, cb, MXML_WS_BEFORE_OPEN, col, putc_cb);
2730
2731 if ((*putc_cb)('<', p) < 0)
2732 return (-1);
2733 if (node->value.element.name[0] == '?' ||
2734 !strncmp(node->value.element.name, "!--", 3) ||
2735 !strncmp(node->value.element.name, "![CDATA[", 8))
2736 {
2737 /*
2738 * Comments, CDATA, and processing instructions do not
2739 * use character entities.
2740 */
2741
2742 const char *ptr; /* Pointer into name */
2743
2744
2745 for (ptr = node->value.element.name; *ptr; ptr ++)
2746 if ((*putc_cb)(*ptr, p) < 0)
2747 return (-1);
2748 }
2749 else if (mxml_write_name(node->value.element.name, p, putc_cb) < 0)
2750 return (-1);
2751
2752 col += strlen(node->value.element.name) + 1;
2753
2754 for (i = node->value.element.num_attrs, attr = node->value.element.attrs;
2755 i > 0;
2756 i --, attr ++)
2757 {
2758 width = strlen(attr->name);
2759
2760 if (attr->value)
2761 width += strlen(attr->value) + 3;
2762
2763 if (global->wrap > 0 && (col + width) > global->wrap)
2764 {
2765 if ((*putc_cb)('\n', p) < 0)
2766 return (-1);
2767
2768 col = 0;
2769 }
2770 else
2771 {
2772 if ((*putc_cb)(' ', p) < 0)
2773 return (-1);
2774
2775 col ++;
2776 }
2777
2778 if (mxml_write_name(attr->name, p, putc_cb) < 0)
2779 return (-1);
2780
2781 if (attr->value)
2782 {
2783 if ((*putc_cb)('=', p) < 0)
2784 return (-1);
2785 if ((*putc_cb)('\"', p) < 0)
2786 return (-1);
2787 if (mxml_write_string(attr->value, p, putc_cb) < 0)
2788 return (-1);
2789 if ((*putc_cb)('\"', p) < 0)
2790 return (-1);
2791 }
2792
2793 col += width;
2794 }
2795
2796 if (node->child)
2797 {
2798 /*
2799 * Write children...
2800 */
2801
2802 mxml_node_t *child; /* Current child */
2803
2804
2805 if ((*putc_cb)('>', p) < 0)
2806 return (-1);
2807 else
2808 col ++;
2809
2810 col = mxml_write_ws(node, p, cb, MXML_WS_AFTER_OPEN, col, putc_cb);
2811
2812 for (child = node->child; child; child = child->next)
2813 {
2814 if ((col = mxml_write_node(child, p, cb, col, putc_cb, global)) < 0)
2815 return (-1);
2816 }
2817
2818 /*
2819 * The ? and ! elements are special-cases and have no end tags...
2820 */
2821
2822 if (node->value.element.name[0] != '!' &&
2823 node->value.element.name[0] != '?')
2824 {
2825 col = mxml_write_ws(node, p, cb, MXML_WS_BEFORE_CLOSE, col, putc_cb);
2826
2827 if ((*putc_cb)('<', p) < 0)
2828 return (-1);
2829 if ((*putc_cb)('/', p) < 0)
2830 return (-1);
2831 if (mxml_write_string(node->value.element.name, p, putc_cb) < 0)
2832 return (-1);
2833 if ((*putc_cb)('>', p) < 0)
2834 return (-1);
2835
2836 col += strlen(node->value.element.name) + 3;
2837
2838 col = mxml_write_ws(node, p, cb, MXML_WS_AFTER_CLOSE, col, putc_cb);
2839 }
2840 }
2841 else if (node->value.element.name[0] == '!' ||
2842 node->value.element.name[0] == '?')
2843 {
2844 /*
2845 * The ? and ! elements are special-cases...
2846 */
2847
2848 if ((*putc_cb)('>', p) < 0)
2849 return (-1);
2850 else
2851 col ++;
2852
2853 col = mxml_write_ws(node, p, cb, MXML_WS_AFTER_OPEN, col, putc_cb);
2854 }
2855 else
2856 {
2857 if ((*putc_cb)(' ', p) < 0)
2858 return (-1);
2859 if ((*putc_cb)('/', p) < 0)
2860 return (-1);
2861 if ((*putc_cb)('>', p) < 0)
2862 return (-1);
2863
2864 col += 3;
2865
2866 col = mxml_write_ws(node, p, cb, MXML_WS_AFTER_OPEN, col, putc_cb);
2867 }
2868 break;
2869
2870 case MXML_INTEGER :
2871 if (node->prev)
2872 {
2873 if (global->wrap > 0 && col > global->wrap)
2874 {
2875 if ((*putc_cb)('\n', p) < 0)
2876 return (-1);
2877
2878 col = 0;
2879 }
2880 else if ((*putc_cb)(' ', p) < 0)
2881 return (-1);
2882 else
2883 col ++;
2884 }
2885
2886 sprintf(s, "%d", node->value.integer);
2887 if (mxml_write_string(s, p, putc_cb) < 0)
2888 return (-1);
2889
2890 col += strlen(s);
2891 break;
2892
2893 case MXML_OPAQUE :
2894 if (mxml_write_string(node->value.opaque, p, putc_cb) < 0)
2895 return (-1);
2896
2897 col += strlen(node->value.opaque);
2898 break;
2899
2900 case MXML_REAL :
2901 if (node->prev)
2902 {
2903 if (global->wrap > 0 && col > global->wrap)
2904 {
2905 if ((*putc_cb)('\n', p) < 0)
2906 return (-1);
2907
2908 col = 0;
2909 }
2910 else if ((*putc_cb)(' ', p) < 0)
2911 return (-1);
2912 else
2913 col ++;
2914 }
2915
2916 sprintf(s, "%f", node->value.real);
2917 if (mxml_write_string(s, p, putc_cb) < 0)
2918 return (-1);
2919
2920 col += strlen(s);
2921 break;
2922
2923 case MXML_TEXT :
2924 if (node->value.text.whitespace && col > 0)
2925 {
2926 if (global->wrap > 0 && col > global->wrap)
2927 {
2928 if ((*putc_cb)('\n', p) < 0)
2929 return (-1);
2930
2931 col = 0;
2932 }
2933 else if ((*putc_cb)(' ', p) < 0)
2934 return (-1);
2935 else
2936 col ++;
2937 }
2938
2939 if (mxml_write_string(node->value.text.string, p, putc_cb) < 0)
2940 return (-1);
2941
2942 col += strlen(node->value.text.string);
2943 break;
2944
2945 case MXML_CUSTOM :
2946 if (global->custom_save_cb)
2947 {
2948 char *data; /* Custom data string */
2949 const char *newline; /* Last newline in string */
2950
2951
2952 if ((data = (*global->custom_save_cb)(node)) == NULL)
2953 return (-1);
2954
2955 if (mxml_write_string(data, p, putc_cb) < 0)
2956 return (-1);
2957
2958 if ((newline = strrchr(data, '\n')) == NULL)
2959 col += strlen(data);
2960 else
2961 col = strlen(newline);
2962
2963 free(data);
2964 break;
2965 }
2966
2967 default : /* Should never happen */
2968 return (-1);
2969 }
2970
2971 return (col);
2972}
2973
2974
2975/*
2976 * 'mxml_write_string()' - Write a string, escaping & and < as needed.
2977 */
2978
2979static int /* O - 0 on success, -1 on failure */
2980mxml_write_string(
2981 const char *s, /* I - String to write */
2982 void *p, /* I - Write pointer */
2983 _mxml_putc_cb_t putc_cb) /* I - Write callback */
2984{
2985 const char *name; /* Entity name, if any */
2986
2987
2988 while (*s)
2989 {
2990 if ((name = mxmlEntityGetName(*s)) != NULL)
2991 {
2992 if ((*putc_cb)('&', p) < 0)
2993 return (-1);
2994
2995 while (*name)
2996 {
2997 if ((*putc_cb)(*name, p) < 0)
2998 return (-1);
2999 name ++;
3000 }
3001
3002 if ((*putc_cb)(';', p) < 0)
3003 return (-1);
3004 }
3005 else if ((*putc_cb)(*s, p) < 0)
3006 return (-1);
3007
3008 s ++;
3009 }
3010
3011 return (0);
3012}
3013
3014
3015/*
3016 * 'mxml_write_ws()' - Do whitespace callback...
3017 */
3018
3019static int /* O - New column */
3020mxml_write_ws(mxml_node_t *node, /* I - Current node */
3021 void *p, /* I - Write pointer */
3022 mxml_save_cb_t cb, /* I - Callback function */
3023 int ws, /* I - Where value */
3024 int col, /* I - Current column */
3025 _mxml_putc_cb_t putc_cb) /* I - Write callback */
3026{
3027 const char *s; /* Whitespace string */
3028
3029
3030 if (cb && (s = (*cb)(node, ws)) != NULL)
3031 {
3032 while (*s)
3033 {
3034 if ((*putc_cb)(*s, p) < 0)
3035 return (-1);
3036 else if (*s == '\n')
3037 col = 0;
3038 else if (*s == '\t')
3039 {
3040 col += MXML_TAB;
3041 col = col - (col % MXML_TAB);
3042 }
3043 else
3044 col ++;
3045
3046 s ++;
3047 }
3048 }
3049
3050 return (col);
3051}
3052
3053
3054/*
Jon Medhurst96b56152014-10-30 18:01:15 +00003055 * End of "$Id: mxml-file.c 455 2014-01-05 03:28:03Z msweet $".
Jon Medhurstaaf37a32013-06-11 12:10:56 +01003056 */