summaryrefslogtreecommitdiff
path: root/src/xine-utils/xmlparser.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/xine-utils/xmlparser.c')
-rw-r--r--src/xine-utils/xmlparser.c419
1 files changed, 355 insertions, 64 deletions
diff --git a/src/xine-utils/xmlparser.c b/src/xine-utils/xmlparser.c
index 1b82074fb..8619b1be8 100644
--- a/src/xine-utils/xmlparser.c
+++ b/src/xine-utils/xmlparser.c
@@ -22,6 +22,12 @@
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
+
+#ifdef XINE_COMPILE
+# include "config.h"
+#endif
+
+#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -40,13 +46,13 @@
*/
#ifdef XINE_COMPILE
-#include "xineutils.h"
+#include <xine/xineutils.h>
#else
#define lprintf(...)
#define XINE_MALLOC
#endif
-#include "xmllexer.h"
-#include "xmlparser.h"
+#include <xine/xmllexer.h>
+#include <xine/xmlparser.h>
#define TOKEN_SIZE 4 * 1024
@@ -80,8 +86,11 @@ static xml_node_t * new_xml_node(void) {
return new_node;
}
+static const char cdata[] = CDATA_MARKER;
+
static void free_xml_node(xml_node_t * node) {
- free (node->name);
+ if (node->name != cdata)
+ free (node->name);
free (node->data);
free(node);
}
@@ -170,23 +179,86 @@ void xml_parser_free_tree(xml_node_t *current_node) {
xml_parser_free_tree_rec(current_node, 1);
}
-#define STATE_IDLE 0
-#define STATE_NODE 1
-#define STATE_COMMENT 7
+typedef enum {
+ /*0*/
+ STATE_IDLE,
+ /* <foo ...> */
+ STATE_NODE,
+ STATE_ATTRIBUTE,
+ STATE_NODE_CLOSE,
+ STATE_TAG_TERM,
+ STATE_ATTRIBUTE_EQUALS,
+ STATE_STRING,
+ STATE_TAG_TERM_IGNORE,
+ /* <?foo ...?> */
+ STATE_Q_NODE,
+ STATE_Q_ATTRIBUTE,
+ STATE_Q_NODE_CLOSE,
+ STATE_Q_TAG_TERM,
+ STATE_Q_ATTRIBUTE_EQUALS,
+ STATE_Q_STRING,
+ /* Others */
+ STATE_COMMENT,
+ STATE_DOCTYPE,
+ STATE_CDATA,
+} parser_state_t;
+
+static xml_node_t *xml_parser_append_text (xml_node_t *node, xml_node_t *subnode, const char *text, int flags)
+{
+ if (!text || !*text)
+ return subnode; /* empty string -> nothing to do */
+
+ if ((flags & XML_PARSER_MULTI_TEXT) && subnode) {
+ /* we have a subtree, so we can't use node->data */
+ if (subnode->name == cdata) {
+ /* most recent node is CDATA - append to it */
+ char *newtext;
+ if (asprintf (&newtext, "%s%s", subnode->data, text) < 0)
+ newtext = NULL;
+ free (subnode->data);
+ subnode->data = newtext;
+ } else {
+ /* most recent node is not CDATA - add a sibling */
+ subnode->next = new_xml_node ();
+ subnode->next->name = cdata;
+ subnode->next->data = strdup (text);
+ subnode = subnode->next;
+ }
+ } else if (node->data) {
+ /* "no" subtree, but we have existing text - append to it */
+ char *newtext;
+ if (asprintf (&newtext, "%s%s", node->data, text) < 0)
+ newtext = NULL;
+ free (node->data);
+ node->data = newtext;
+ } else {
+ /* no text, "no" subtree - duplicate & assign */
+ while (isspace (*text))
+ ++text;
+ if (*text)
+ node->data = strdup (text);
+ }
+
+ return subnode;
+}
-static int xml_parser_get_node (xml_parser_t *xml_parser, xml_node_t *current_node, char *root_name, int rec);
+#define Q_STATE(CURRENT,NEW) (STATE_##NEW + state - STATE_##CURRENT)
-static int _xml_parser_get_node (xml_parser_t *xml_parser, char ** token_buffer, int * token_buffer_size,
+
+static int xml_parser_get_node_internal (xml_parser_t *xml_parser,
+ char ** token_buffer, int * token_buffer_size,
char ** pname_buffer, int * pname_buffer_size,
char ** nname_buffer, int * nname_buffer_size,
- xml_node_t *current_node, char *root_name, int rec) {
+ xml_node_t *current_node, char *root_names[], int rec, int flags)
+{
char *tok = *token_buffer;
char *property_name = *pname_buffer;
char *node_name = *nname_buffer;
- int state = STATE_IDLE;
+ parser_state_t state = STATE_IDLE;
int res = 0;
int parse_res;
int bypass_get_token = 0;
+ int retval = 0; /* used when state==4; non-0 if there are missing </...> */
xml_node_t *subtree = NULL;
xml_node_t *current_subtree = NULL;
xml_property_t *current_property = NULL;
@@ -209,30 +281,33 @@ static int _xml_parser_get_node (xml_parser_t *xml_parser, char ** token_buffer,
/* do nothing */
break;
case (T_EOF):
- return 0; /* normal end */
+ return retval; /* normal end */
break;
case (T_M_START_1):
state = STATE_NODE;
break;
case (T_M_START_2):
- state = 3;
+ state = STATE_NODE_CLOSE;
break;
case (T_C_START):
state = STATE_COMMENT;
break;
case (T_TI_START):
- state = 8;
+ state = STATE_Q_NODE;
break;
case (T_DOCTYPE_START):
- state = 9;
+ state = STATE_DOCTYPE;
+ break;
+ case (T_CDATA_START):
+ state = STATE_CDATA;
break;
case (T_DATA):
/* current data */
- if (current_node->data) {
- /* avoid a memory leak */
- free(current_node->data);
+ {
+ char *decoded = lexer_decode_entities (tok);
+ current_subtree = xml_parser_append_text (current_node, current_subtree, decoded, flags);
+ free (decoded);
}
- current_node->data = lexer_decode_entities(tok);
lprintf("info: node data : %s\n", current_node->data);
break;
default:
@@ -243,6 +318,7 @@ static int _xml_parser_get_node (xml_parser_t *xml_parser, char ** token_buffer,
break;
case STATE_NODE:
+ case STATE_Q_NODE:
switch (res) {
case (T_IDENT):
properties = NULL;
@@ -252,14 +328,19 @@ static int _xml_parser_get_node (xml_parser_t *xml_parser, char ** token_buffer,
if (xml_parser->mode == XML_PARSER_CASE_INSENSITIVE) {
strtoupper(tok);
}
- /* make sure the buffer for the node name is big enough */
- if (*token_buffer_size > *nname_buffer_size) {
- *nname_buffer_size = *token_buffer_size;
- *nname_buffer = realloc (*nname_buffer, *nname_buffer_size);
- node_name = *nname_buffer;
+ if (state == STATE_Q_NODE) {
+ if (asprintf (&node_name, "?%s", tok) < 0)
+ node_name = NULL;
+ free (*nname_buffer);
+ *nname_buffer = node_name;
+ *nname_buffer_size = strlen (node_name) + 1;
+ state = STATE_Q_ATTRIBUTE;
+ } else {
+ free (*nname_buffer);
+ *nname_buffer = node_name = strdup (tok);
+ *nname_buffer_size = strlen (node_name) + 1;
+ state = STATE_ATTRIBUTE;
}
- strcpy(node_name, tok);
- state = 2;
lprintf("info: current node name \"%s\"\n", node_name);
break;
default:
@@ -268,7 +349,8 @@ static int _xml_parser_get_node (xml_parser_t *xml_parser, char ** token_buffer,
break;
}
break;
- case 2:
+
+ case STATE_ATTRIBUTE:
switch (res) {
case (T_EOL):
case (T_SEPAR):
@@ -284,8 +366,13 @@ static int _xml_parser_get_node (xml_parser_t *xml_parser, char ** token_buffer,
/* set node propertys */
subtree->props = properties;
lprintf("info: rec %d new subtree %s\n", rec, node_name);
- parse_res = xml_parser_get_node(xml_parser, subtree, node_name, rec + 1);
- if (parse_res != 0) {
+ root_names[rec + 1] = strdup (node_name);
+ parse_res = xml_parser_get_node_internal (xml_parser, token_buffer, token_buffer_size,
+ pname_buffer, pname_buffer_size,
+ nname_buffer, nname_buffer_size,
+ subtree, root_names, rec + 1, flags);
+ free (root_names[rec + 1]);
+ if (parse_res == -1 || parse_res > 0) {
return parse_res;
}
if (current_subtree == NULL) {
@@ -295,11 +382,16 @@ static int _xml_parser_get_node (xml_parser_t *xml_parser, char ** token_buffer,
current_subtree->next = subtree;
current_subtree = subtree;
}
+ if (parse_res < -1) {
+ /* badly-formed XML (missing close tag) */
+ return parse_res + 1 + (parse_res == -2);
+ }
state = STATE_IDLE;
break;
case (T_M_STOP_2):
/* new leaf */
/* new subtree */
+ new_leaf:
subtree = new_xml_node();
/* set node name */
@@ -321,6 +413,7 @@ static int _xml_parser_get_node (xml_parser_t *xml_parser, char ** token_buffer,
break;
case (T_IDENT):
/* save property name */
+ new_prop:
if (xml_parser->mode == XML_PARSER_CASE_INSENSITIVE) {
strtoupper(tok);
}
@@ -331,7 +424,7 @@ static int _xml_parser_get_node (xml_parser_t *xml_parser, char ** token_buffer,
property_name = *pname_buffer;
}
strcpy(property_name, tok);
- state = 5;
+ state = Q_STATE(ATTRIBUTE, ATTRIBUTE_EQUALS);
lprintf("info: current property name \"%s\"\n", property_name);
break;
default:
@@ -341,17 +434,50 @@ static int _xml_parser_get_node (xml_parser_t *xml_parser, char ** token_buffer,
}
break;
- case 3:
+ case STATE_Q_ATTRIBUTE:
+ switch (res) {
+ case (T_EOL):
+ case (T_SEPAR):
+ /* nothing */
+ break;
+ case (T_TI_STOP):
+ goto new_leaf;
+ case (T_IDENT):
+ goto new_prop;
+ default:
+ lprintf("error: unexpected token \"%s\", state %d\n", tok, state);
+ return -1;
+ break;
+ }
+ break;
+
+ case STATE_NODE_CLOSE:
switch (res) {
case (T_IDENT):
/* must be equal to root_name */
if (xml_parser->mode == XML_PARSER_CASE_INSENSITIVE) {
strtoupper(tok);
}
- if (strcmp(tok, root_name) == 0) {
- state = 4;
- } else {
- lprintf("error: xml struct, tok=%s, waited_tok=%s\n", tok, root_name);
+ if (strcmp(tok, root_names[rec]) == 0) {
+ state = STATE_TAG_TERM;
+ } else if (flags & XML_PARSER_RELAXED) {
+ int r = rec;
+ while (--r >= 0)
+ if (strcmp(tok, root_names[r]) == 0) {
+ lprintf("warning: wanted %s, got %s - assuming missing close tags\n", root_names[rec], tok);
+ retval = r - rec - 1; /* -1 - (no. of implied close tags) */
+ state = STATE_TAG_TERM;
+ break;
+ }
+ /* relaxed parsing, ignoring extra close tag (but we don't handle out-of-order) */
+ if (r < 0) {
+ lprintf("warning: extra close tag %s - ignoring\n", tok);
+ state = STATE_TAG_TERM_IGNORE;
+ }
+ }
+ else
+ {
+ lprintf("error: xml struct, tok=%s, waited_tok=%s\n", tok, root_names[rec]);
return -1;
}
break;
@@ -363,10 +489,10 @@ static int _xml_parser_get_node (xml_parser_t *xml_parser, char ** token_buffer,
break;
/* > expected */
- case 4:
+ case STATE_TAG_TERM:
switch (res) {
case (T_M_STOP_1):
- return 0;
+ return retval;
break;
default:
lprintf("error: unexpected token \"%s\", state %d\n", tok, state);
@@ -376,18 +502,18 @@ static int _xml_parser_get_node (xml_parser_t *xml_parser, char ** token_buffer,
break;
/* = or > or ident or separator expected */
- case 5:
+ case STATE_ATTRIBUTE_EQUALS:
switch (res) {
case (T_EOL):
case (T_SEPAR):
/* do nothing */
break;
case (T_EQUAL):
- state = 6;
+ state = STATE_STRING;
break;
case (T_IDENT):
bypass_get_token = 1; /* jump to state 2 without get a new token */
- state = 2;
+ state = STATE_ATTRIBUTE;
break;
case (T_M_STOP_1):
/* add a new property without value */
@@ -401,7 +527,42 @@ static int _xml_parser_get_node (xml_parser_t *xml_parser, char ** token_buffer,
current_property->name = strdup (property_name);
lprintf("info: new property %s\n", current_property->name);
bypass_get_token = 1; /* jump to state 2 without get a new token */
- state = 2;
+ state = STATE_ATTRIBUTE;
+ break;
+ default:
+ lprintf("error: unexpected token \"%s\", state %d\n", tok, state);
+ return -1;
+ break;
+ }
+ break;
+
+ /* = or ?> or ident or separator expected */
+ case STATE_Q_ATTRIBUTE_EQUALS:
+ switch (res) {
+ case (T_EOL):
+ case (T_SEPAR):
+ /* do nothing */
+ break;
+ case (T_EQUAL):
+ state = STATE_Q_STRING;
+ break;
+ case (T_IDENT):
+ bypass_get_token = 1; /* jump to state 2 without get a new token */
+ state = STATE_Q_ATTRIBUTE;
+ break;
+ case (T_TI_STOP):
+ /* add a new property without value */
+ if (current_property == NULL) {
+ properties = new_xml_property();
+ current_property = properties;
+ } else {
+ current_property->next = new_xml_property();
+ current_property = current_property->next;
+ }
+ current_property->name = strdup (property_name);
+ lprintf("info: new property %s\n", current_property->name);
+ bypass_get_token = 1; /* jump to state 2 without get a new token */
+ state = STATE_Q_ATTRIBUTE;
break;
default:
lprintf("error: unexpected token \"%s\", state %d\n", tok, state);
@@ -411,7 +572,8 @@ static int _xml_parser_get_node (xml_parser_t *xml_parser, char ** token_buffer,
break;
/* string or ident or separator expected */
- case 6:
+ case STATE_STRING:
+ case STATE_Q_STRING:
switch (res) {
case (T_EOL):
case (T_SEPAR):
@@ -430,7 +592,7 @@ static int _xml_parser_get_node (xml_parser_t *xml_parser, char ** token_buffer,
current_property->name = strdup(property_name);
current_property->value = lexer_decode_entities(tok);
lprintf("info: new property %s=%s\n", current_property->name, current_property->value);
- state = 2;
+ state = Q_STATE(STRING, ATTRIBUTE);
break;
default:
lprintf("error: unexpected token \"%s\", state %d\n", tok, state);
@@ -446,31 +608,45 @@ static int _xml_parser_get_node (xml_parser_t *xml_parser, char ** token_buffer,
state = STATE_IDLE;
break;
default:
- state = STATE_COMMENT;
break;
}
break;
- /* ?> expected */
- case 8:
+ /* > expected */
+ case STATE_DOCTYPE:
switch (res) {
- case (T_TI_STOP):
+ case (T_M_STOP_1):
state = 0;
break;
default:
- state = 8;
break;
}
break;
- /* > expected */
- case 9:
+ /* ]]> expected */
+ case STATE_CDATA:
+ switch (res) {
+ case (T_CDATA_STOP):
+ current_subtree = xml_parser_append_text (current_node, current_subtree, tok, flags);
+ lprintf("info: node cdata : %s\n", tok);
+ state = STATE_IDLE;
+ break;
+ default:
+ lprintf("error: unexpected token \"%s\", state %d\n", tok, state);
+ return -1;
+ break;
+ }
+ break;
+
+ /* > expected (following unmatched "</...") */
+ case STATE_TAG_TERM_IGNORE:
switch (res) {
case (T_M_STOP_1):
- state = 0;
+ state = STATE_IDLE;
break;
default:
- state = 9;
+ lprintf("error: unexpected token \"%s\", state %d\n", tok, state);
+ return -1;
break;
}
break;
@@ -491,7 +667,7 @@ static int _xml_parser_get_node (xml_parser_t *xml_parser, char ** token_buffer,
}
}
-static int xml_parser_get_node (xml_parser_t *xml_parser, xml_node_t *current_node, char *root_name, int rec)
+static int xml_parser_get_node (xml_parser_t *xml_parser, xml_node_t *current_node, int flags)
{
int res = 0;
int token_buffer_size = TOKEN_SIZE;
@@ -500,11 +676,14 @@ static int xml_parser_get_node (xml_parser_t *xml_parser, xml_node_t *current_no
char *token_buffer = calloc(1, token_buffer_size);
char *pname_buffer = calloc(1, pname_buffer_size);
char *nname_buffer = calloc(1, nname_buffer_size);
+ char *root_names[MAX_RECURSION + 1];
+ root_names[0] = "";
- res = _xml_parser_get_node(xml_parser, &token_buffer, &token_buffer_size,
+ res = xml_parser_get_node_internal (xml_parser,
+ &token_buffer, &token_buffer_size,
&pname_buffer, &pname_buffer_size,
&nname_buffer, &nname_buffer_size,
- current_node, root_name, rec);
+ current_node, root_names, 0, flags);
free (token_buffer);
free (pname_buffer);
@@ -514,18 +693,48 @@ static int xml_parser_get_node (xml_parser_t *xml_parser, xml_node_t *current_no
}
/* for ABI compatibility */
-int xml_parser_build_tree(xml_node_t **root_node) {
- return xml_parser_build_tree_r(static_xml_parser, root_node);
+int xml_parser_build_tree_with_options(xml_node_t **root_node, int flags) {
+ return xml_parser_build_tree_with_options_r(static_xml_parser, root_node, flags);
}
-int xml_parser_build_tree_r(xml_parser_t *xml_parser, xml_node_t **root_node) {
- xml_node_t *tmp_node;
+int xml_parser_build_tree_with_options_r(xml_parser_t *xml_parser, xml_node_t **root_node, int flags) {
+ xml_node_t *tmp_node, *pri_node, *q_node;
int res;
tmp_node = new_xml_node();
- res = xml_parser_get_node(xml_parser, tmp_node, "", 0);
- if ((tmp_node->child) && (!tmp_node->child->next)) {
- *root_node = tmp_node->child;
+ res = xml_parser_get_node(xml_parser, tmp_node, flags);
+
+ /* delete any top-level [CDATA] nodes */;
+ pri_node = tmp_node->child;
+ q_node = NULL;
+ while (pri_node) {
+ if (pri_node->name == cdata) {
+ xml_node_t *old = pri_node;
+ if (q_node)
+ q_node->next = pri_node->next;
+ else
+ q_node = pri_node;
+ pri_node = pri_node->next;
+ free_xml_node (old);
+ } else {
+ q_node = pri_node;
+ pri_node = pri_node->next;
+ }
+ }
+
+ /* find first non-<?...?> node */;
+ for (pri_node = tmp_node->child, q_node = NULL;
+ pri_node && pri_node->name[0] == '?';
+ pri_node = pri_node->next)
+ q_node = pri_node; /* last <?...?> node (eventually), or NULL */
+
+ if (pri_node && !pri_node->next) {
+ /* move the tail to the head (for compatibility reasons) */
+ if (q_node) {
+ pri_node->next = tmp_node->child;
+ q_node->next = NULL;
+ }
+ *root_node = pri_node;
free_xml_node(tmp_node);
res = 0;
} else {
@@ -536,6 +745,15 @@ int xml_parser_build_tree_r(xml_parser_t *xml_parser, xml_node_t **root_node) {
return res;
}
+/* for ABI compatibility */
+int xml_parser_build_tree(xml_node_t **root_node) {
+ return xml_parser_build_tree_with_options_r (static_xml_parser, root_node, 0);
+}
+
+int xml_parser_build_tree_r(xml_parser_t *xml_parser, xml_node_t **root_node) {
+ return xml_parser_build_tree_with_options_r(xml_parser, root_node, 0);
+}
+
const char *xml_parser_get_property (const xml_node_t *node, const char *name) {
xml_property_t *prop;
@@ -637,7 +855,7 @@ static void xml_parser_dump_node (const xml_node_t *node, int indent) {
free (value);
p = p->next;
if (p) {
- printf ("\n%*s", (int)(indent+2+l), "");
+ printf ("\n%*s", indent+2+l, "");
}
}
printf (">\n");
@@ -654,5 +872,78 @@ static void xml_parser_dump_node (const xml_node_t *node, int indent) {
}
void xml_parser_dump_tree (const xml_node_t *node) {
- xml_parser_dump_node (node, 0);
+ do {
+ xml_parser_dump_node (node, 0);
+ node = node->next;
+ } while (node);
}
+
+#ifdef XINE_XML_PARSER_TEST
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+void *xine_xmalloc (size_t size)
+{
+ return malloc (size);
+}
+
+int main (int argc, char **argv)
+{
+ int i, ret = 0;
+ for (i = 1; argv[i]; ++i)
+ {
+ xml_node_t *tree;
+ int fd;
+ void *buf;
+ struct stat st;
+
+ if (stat (argv[i], &st))
+ {
+ perror (argv[i]);
+ ret = 1;
+ continue;
+ }
+ if (!S_ISREG (st.st_mode))
+ {
+ printf ("%s: not a file\n", argv[i]);
+ ret = 1;
+ continue;
+ }
+ fd = open (argv[i], O_RDONLY);
+ if (!fd)
+ {
+ perror (argv[i]);
+ ret = 1;
+ continue;
+ }
+ buf = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
+ if (!buf)
+ {
+ perror (argv[i]);
+ if (close (fd))
+ perror (argv[i]);
+ ret = 1;
+ continue;
+ }
+
+ xml_parser_init (buf, st.st_size, 0);
+ if (!xml_parser_build_tree (&tree))
+ {
+ puts (argv[i]);
+ xml_parser_dump_tree (tree);
+ xml_parser_free_tree (tree);
+ }
+ else
+ printf ("%s: parser failure\n", argv[i]);
+
+ if (close (fd))
+ {
+ perror (argv[i]);
+ ret = 1;
+ }
+ }
+ return ret;
+}
+#endif