summaryrefslogtreecommitdiff
path: root/src/xine-utils/xmlparser.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/xine-utils/xmlparser.c')
-rw-r--r--src/xine-utils/xmlparser.c308
1 files changed, 260 insertions, 48 deletions
diff --git a/src/xine-utils/xmlparser.c b/src/xine-utils/xmlparser.c
index a5d8212d2..a7bc146a9 100644
--- a/src/xine-utils/xmlparser.c
+++ b/src/xine-utils/xmlparser.c
@@ -19,6 +19,10 @@
* Floor, Boston, MA 02110, USA
*/
+#ifdef XINE_COMPILE
+# include "config.h"
+#endif
+
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
@@ -73,8 +77,11 @@ static xml_node_t * new_xml_node(void) {
return new_node;
}
+static const char cdata[] = CDATA_MARKER;
+
static void free_xml_node(xml_node_t * node) {
- free (node->name);
+ if (node->name != cdata)
+ free (node->name);
free (node->data);
free(node);
}
@@ -149,18 +156,79 @@ void xml_parser_free_tree(xml_node_t *current_node) {
xml_parser_free_tree_rec(current_node, 1);
}
-#define STATE_IDLE 0
-#define STATE_NODE 1
-#define STATE_COMMENT 7
+typedef enum {
+ /*0*/
+ STATE_IDLE,
+ /* <foo ...> */
+ STATE_NODE,
+ STATE_ATTRIBUTE,
+ STATE_NODE_CLOSE,
+ STATE_TAG_TERM,
+ STATE_ATTRIBUTE_EQUALS,
+ STATE_STRING,
+ STATE_TAG_TERM_IGNORE,
+ /* <?foo ...?> */
+ STATE_Q_NODE,
+ STATE_Q_ATTRIBUTE,
+ STATE_Q_NODE_CLOSE,
+ STATE_Q_TAG_TERM,
+ STATE_Q_ATTRIBUTE_EQUALS,
+ STATE_Q_STRING,
+ /* Others */
+ STATE_COMMENT,
+ STATE_DOCTYPE,
+ STATE_CDATA,
+} parser_state_t;
+
+static xml_node_t *xml_parser_append_text (xml_node_t *node, xml_node_t *subnode, const char *text, int flags)
+{
+ if (!text || !*text)
+ return subnode; /* empty string -> nothing to do */
+
+ if ((flags & XML_PARSER_MULTI_TEXT) && subnode) {
+ /* we have a subtree, so we can't use node->data */
+ if (subnode->name == cdata) {
+ /* most recent node is CDATA - append to it */
+ char *newtext;
+ asprintf (&newtext, "%s%s", subnode->data, text);
+ free (subnode->data);
+ subnode->data = newtext;
+ } else {
+ /* most recent node is not CDATA - add a sibling */
+ subnode->next = new_xml_node ();
+ subnode->next->name = cdata;
+ subnode->next->data = strdup (text);
+ subnode = subnode->next;
+ }
+ } else if (node->data) {
+ /* "no" subtree, but we have existing text - append to it */
+ char *newtext;
+ asprintf (&newtext, "%s%s", node->data, text);
+ free (node->data);
+ node->data = newtext;
+ } else {
+ /* no text, "no" subtree - duplicate & assign */
+ while (isspace (*text))
+ ++text;
+ if (*text)
+ node->data = strdup (text);
+ }
+
+ return subnode;
+}
+
+#define Q_STATE(CURRENT,NEW) (STATE_##NEW + state - STATE_##CURRENT)
-static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int rec) {
+static int xml_parser_get_node_internal (xml_node_t *current_node, char *root_names[], int rec, int flags)
+{
char tok[TOKEN_SIZE];
char property_name[TOKEN_SIZE];
char node_name[TOKEN_SIZE];
- int state = STATE_IDLE;
+ parser_state_t state = STATE_IDLE;
int res = 0;
int parse_res;
int bypass_get_token = 0;
+ int retval = 0; /* used when state==4; non-0 if there are missing </...> */
xml_node_t *subtree = NULL;
xml_node_t *current_subtree = NULL;
xml_property_t *current_property = NULL;
@@ -182,30 +250,33 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r
/* do nothing */
break;
case (T_EOF):
- return 0; /* normal end */
+ return retval; /* normal end */
break;
case (T_M_START_1):
state = STATE_NODE;
break;
case (T_M_START_2):
- state = 3;
+ state = STATE_NODE_CLOSE;
break;
case (T_C_START):
state = STATE_COMMENT;
break;
case (T_TI_START):
- state = 8;
+ state = STATE_Q_NODE;
break;
case (T_DOCTYPE_START):
- state = 9;
+ state = STATE_DOCTYPE;
+ break;
+ case (T_CDATA_START):
+ state = STATE_CDATA;
break;
case (T_DATA):
/* current data */
- if (current_node->data) {
- /* avoid a memory leak */
- free(current_node->data);
+ {
+ char *decoded = lexer_decode_entities (tok);
+ current_subtree = xml_parser_append_text (current_node, current_subtree, decoded, flags);
+ free (decoded);
}
- current_node->data = lexer_decode_entities(tok);
lprintf("info: node data : %s\n", current_node->data);
break;
default:
@@ -216,6 +287,7 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r
break;
case STATE_NODE:
+ case STATE_Q_NODE:
switch (res) {
case (T_IDENT):
properties = NULL;
@@ -225,8 +297,13 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r
if (xml_parser_mode == XML_PARSER_CASE_INSENSITIVE) {
strtoupper(tok);
}
- strcpy(node_name, tok);
- state = 2;
+ if (state == STATE_Q_NODE) {
+ snprintf (node_name, TOKEN_SIZE, "?%s", tok);
+ state = STATE_Q_ATTRIBUTE;
+ } else {
+ strcpy(node_name, tok);
+ state = STATE_ATTRIBUTE;
+ }
lprintf("info: current node name \"%s\"\n", node_name);
break;
default:
@@ -235,7 +312,8 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r
break;
}
break;
- case 2:
+
+ case STATE_ATTRIBUTE:
switch (res) {
case (T_EOL):
case (T_SEPAR):
@@ -251,8 +329,9 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r
/* set node propertys */
subtree->props = properties;
lprintf("info: rec %d new subtree %s\n", rec, node_name);
- parse_res = xml_parser_get_node(subtree, node_name, rec + 1);
- if (parse_res != 0) {
+ root_names[rec + 1] = node_name;
+ parse_res = xml_parser_get_node_internal(subtree, root_names, rec + 1, flags);
+ if (parse_res == -1 || parse_res > 0) {
return parse_res;
}
if (current_subtree == NULL) {
@@ -262,11 +341,16 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r
current_subtree->next = subtree;
current_subtree = subtree;
}
+ if (parse_res < -1) {
+ /* badly-formed XML (missing close tag) */
+ return parse_res + 1 + (parse_res == -2);
+ }
state = STATE_IDLE;
break;
case (T_M_STOP_2):
/* new leaf */
/* new subtree */
+ new_leaf:
subtree = new_xml_node();
/* set node name */
@@ -288,11 +372,12 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r
break;
case (T_IDENT):
/* save property name */
+ new_prop:
if (xml_parser_mode == XML_PARSER_CASE_INSENSITIVE) {
strtoupper(tok);
}
strcpy(property_name, tok);
- state = 5;
+ state = Q_STATE(ATTRIBUTE, ATTRIBUTE_EQUALS);
lprintf("info: current property name \"%s\"\n", property_name);
break;
default:
@@ -302,17 +387,50 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r
}
break;
- case 3:
+ case STATE_Q_ATTRIBUTE:
+ switch (res) {
+ case (T_EOL):
+ case (T_SEPAR):
+ /* nothing */
+ break;
+ case (T_TI_STOP):
+ goto new_leaf;
+ case (T_IDENT):
+ goto new_prop;
+ default:
+ lprintf("error: unexpected token \"%s\", state %d\n", tok, state);
+ return -1;
+ break;
+ }
+ break;
+
+ case STATE_NODE_CLOSE:
switch (res) {
case (T_IDENT):
/* must be equal to root_name */
if (xml_parser_mode == XML_PARSER_CASE_INSENSITIVE) {
strtoupper(tok);
}
- if (strcmp(tok, root_name) == 0) {
- state = 4;
- } else {
- lprintf("error: xml struct, tok=%s, waited_tok=%s\n", tok, root_name);
+ if (strcmp(tok, root_names[rec]) == 0) {
+ state = STATE_TAG_TERM;
+ } else if (flags & XML_PARSER_RELAXED) {
+ int r = rec;
+ while (--r >= 0)
+ if (strcmp(tok, root_names[r]) == 0) {
+ lprintf("warning: wanted %s, got %s - assuming missing close tags\n", root_names[rec], tok);
+ retval = r - rec - 1; /* -1 - (no. of implied close tags) */
+ state = STATE_TAG_TERM;
+ break;
+ }
+ /* relaxed parsing, ignoring extra close tag (but we don't handle out-of-order) */
+ if (r < 0) {
+ lprintf("warning: extra close tag %s - ignoring\n", tok);
+ state = STATE_TAG_TERM_IGNORE;
+ }
+ }
+ else
+ {
+ lprintf("error: xml struct, tok=%s, waited_tok=%s\n", tok, root_names[rec]);
return -1;
}
break;
@@ -324,10 +442,10 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r
break;
/* > expected */
- case 4:
+ case STATE_TAG_TERM:
switch (res) {
case (T_M_STOP_1):
- return 0;
+ return retval;
break;
default:
lprintf("error: unexpected token \"%s\", state %d\n", tok, state);
@@ -337,18 +455,18 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r
break;
/* = or > or ident or separator expected */
- case 5:
+ case STATE_ATTRIBUTE_EQUALS:
switch (res) {
case (T_EOL):
case (T_SEPAR):
/* do nothing */
break;
case (T_EQUAL):
- state = 6;
+ state = STATE_STRING;
break;
case (T_IDENT):
bypass_get_token = 1; /* jump to state 2 without get a new token */
- state = 2;
+ state = STATE_ATTRIBUTE;
break;
case (T_M_STOP_1):
/* add a new property without value */
@@ -362,7 +480,42 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r
current_property->name = strdup (property_name);
lprintf("info: new property %s\n", current_property->name);
bypass_get_token = 1; /* jump to state 2 without get a new token */
- state = 2;
+ state = STATE_ATTRIBUTE;
+ break;
+ default:
+ lprintf("error: unexpected token \"%s\", state %d\n", tok, state);
+ return -1;
+ break;
+ }
+ break;
+
+ /* = or ?> or ident or separator expected */
+ case STATE_Q_ATTRIBUTE_EQUALS:
+ switch (res) {
+ case (T_EOL):
+ case (T_SEPAR):
+ /* do nothing */
+ break;
+ case (T_EQUAL):
+ state = STATE_Q_STRING;
+ break;
+ case (T_IDENT):
+ bypass_get_token = 1; /* jump to state 2 without get a new token */
+ state = STATE_Q_ATTRIBUTE;
+ break;
+ case (T_TI_STOP):
+ /* add a new property without value */
+ if (current_property == NULL) {
+ properties = new_xml_property();
+ current_property = properties;
+ } else {
+ current_property->next = new_xml_property();
+ current_property = current_property->next;
+ }
+ current_property->name = strdup (property_name);
+ lprintf("info: new property %s\n", current_property->name);
+ bypass_get_token = 1; /* jump to state 2 without get a new token */
+ state = STATE_Q_ATTRIBUTE;
break;
default:
lprintf("error: unexpected token \"%s\", state %d\n", tok, state);
@@ -372,7 +525,8 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r
break;
/* string or ident or separator expected */
- case 6:
+ case STATE_STRING:
+ case STATE_Q_STRING:
switch (res) {
case (T_EOL):
case (T_SEPAR):
@@ -391,7 +545,7 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r
current_property->name = strdup(property_name);
current_property->value = lexer_decode_entities(tok);
lprintf("info: new property %s=%s\n", current_property->name, current_property->value);
- state = 2;
+ state = Q_STATE(STRING, ATTRIBUTE);
break;
default:
lprintf("error: unexpected token \"%s\", state %d\n", tok, state);
@@ -407,31 +561,45 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r
state = STATE_IDLE;
break;
default:
- state = STATE_COMMENT;
break;
}
break;
- /* ?> expected */
- case 8:
+ /* > expected */
+ case STATE_DOCTYPE:
switch (res) {
- case (T_TI_STOP):
+ case (T_M_STOP_1):
state = 0;
break;
default:
- state = 8;
break;
}
break;
- /* > expected */
- case 9:
+ /* ]]> expected */
+ case STATE_CDATA:
+ switch (res) {
+ case (T_CDATA_STOP):
+ current_subtree = xml_parser_append_text (current_node, current_subtree, tok, flags);
+ lprintf("info: node cdata : %s\n", tok);
+ state = STATE_IDLE;
+ break;
+ default:
+ lprintf("error: unexpected token \"%s\", state %d\n", tok, state);
+ return -1;
+ break;
+ }
+ break;
+
+ /* > expected (following unmatched "</...") */
+ case STATE_TAG_TERM_IGNORE:
switch (res) {
case (T_M_STOP_1):
- state = 0;
+ state = STATE_IDLE;
break;
default:
- state = 9;
+ lprintf("error: unexpected token \"%s\", state %d\n", tok, state);
+ return -1;
break;
}
break;
@@ -452,14 +620,51 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r
}
}
-int xml_parser_build_tree(xml_node_t **root_node) {
- xml_node_t *tmp_node;
+static int xml_parser_get_node (xml_node_t *current_node, int flags)
+{
+ char *root_names[MAX_RECURSION + 1];
+ root_names[0] = "";
+ return xml_parser_get_node_internal (current_node, root_names, 0, flags);
+}
+
+int xml_parser_build_tree_with_options(xml_node_t **root_node, int flags) {
+ xml_node_t *tmp_node, *pri_node, *q_node;
int res;
tmp_node = new_xml_node();
- res = xml_parser_get_node(tmp_node, "", 0);
- if ((tmp_node->child) && (!tmp_node->child->next)) {
- *root_node = tmp_node->child;
+ res = xml_parser_get_node(tmp_node, flags);
+
+ /* delete any top-level [CDATA] nodes */;
+ pri_node = tmp_node->child;
+ q_node = NULL;
+ while (pri_node) {
+ if (pri_node->name == cdata) {
+ xml_node_t *old = pri_node;
+ if (q_node)
+ q_node->next = pri_node->next;
+ else
+ q_node = pri_node;
+ pri_node = pri_node->next;
+ free_xml_node (old);
+ } else {
+ q_node = pri_node;
+ pri_node = pri_node->next;
+ }
+ }
+
+ /* find first non-<?...?> node */;
+ for (pri_node = tmp_node->child, q_node = NULL;
+ pri_node && pri_node->name[0] == '?';
+ pri_node = pri_node->next)
+ q_node = pri_node; /* last <?...?> node (eventually), or NULL */
+
+ if (pri_node && !pri_node->next) {
+ /* move the tail to the head (for compatibility reasons) */
+ if (q_node) {
+ pri_node->next = tmp_node->child;
+ q_node->next = NULL;
+ }
+ *root_node = pri_node;
free_xml_node(tmp_node);
res = 0;
} else {
@@ -470,6 +675,10 @@ int xml_parser_build_tree(xml_node_t **root_node) {
return res;
}
+int xml_parser_build_tree(xml_node_t **root_node) {
+ return xml_parser_build_tree_with_options (root_node, 0);
+}
+
const char *xml_parser_get_property (const xml_node_t *node, const char *name) {
xml_property_t *prop;
@@ -588,5 +797,8 @@ static void xml_parser_dump_node (const xml_node_t *node, int indent) {
}
void xml_parser_dump_tree (const xml_node_t *node) {
- xml_parser_dump_node (node, 0);
+ do {
+ xml_parser_dump_node (node, 0);
+ node = node->next;
+ } while (node);
}