From a7cb332c30b51b4e27f6e57c6b58dc2d1f73f89c Mon Sep 17 00:00:00 2001 From: Darren Salt Date: Thu, 19 Apr 2007 17:24:08 +0100 Subject: Relaxed XML parsing (ABI addition). Copes with missing close tags. Copes with extra close tags iff they don't match so-far-unclosed elements. Doesn't cope with missing "/>" - that needs more info from the caller. --- src/xine-utils/xmlparser.c | 65 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 56 insertions(+), 9 deletions(-) (limited to 'src/xine-utils/xmlparser.c') diff --git a/src/xine-utils/xmlparser.c b/src/xine-utils/xmlparser.c index 47096705a..82e1f0ecd 100644 --- a/src/xine-utils/xmlparser.c +++ b/src/xine-utils/xmlparser.c @@ -156,7 +156,8 @@ void xml_parser_free_tree(xml_node_t *current_node) { #define STATE_NODE 1 #define STATE_COMMENT 7 -static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int rec) { +static int xml_parser_get_node_internal (xml_node_t *current_node, char *root_names[], int rec, int relaxed) +{ char tok[TOKEN_SIZE]; char property_name[TOKEN_SIZE]; char node_name[TOKEN_SIZE]; @@ -164,6 +165,7 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r int res = 0; int parse_res; int bypass_get_token = 0; + int retval = 0; /* used when state==4; non-0 if there are missing */ xml_node_t *subtree = NULL; xml_node_t *current_subtree = NULL; xml_property_t *current_property = NULL; @@ -183,7 +185,7 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r /* do nothing */ break; case (T_EOF): - return 0; /* normal end */ + return retval; /* normal end */ break; case (T_M_START_1): state = STATE_NODE; @@ -252,7 +254,12 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r /* set node propertys */ subtree->props = properties; lprintf("info: rec %d new subtree %s\n", rec, node_name); - parse_res = xml_parser_get_node(subtree, node_name, rec + 1); + root_names[rec + 1] = node_name; + parse_res = xml_parser_get_node_internal(subtree, root_names, rec + 1, relaxed); + if (parse_res < -1) { + /* badly-formed XML (missing close tag) */ + return parse_res + 1 + (parse_res == -2); + } if (parse_res != 0) { return parse_res; } @@ -310,10 +317,26 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r if (xml_parser_mode == XML_PARSER_CASE_INSENSITIVE) { strtoupper(tok); } - if (strcmp(tok, root_name) == 0) { + if (strcmp(tok, root_names[rec]) == 0) { state = 4; - } else { - lprintf("error: xml struct, tok=%s, waited_tok=%s\n", tok, root_name); + } else if (relaxed) { + int r = rec; + while (--r >= 0) + if (strcmp(tok, root_names[r]) == 0) { + lprintf("warning: wanted %s, got %s - assuming missing close tags\n", root_names[rec], tok); + retval = r - rec - 1; /* -1 - (no. of implied close tags) */ + state = 4; + break; + } + /* relaxed parsing, ignoring extra close tag (but we don't handle out-of-order) */ + if (r < 0) { + lprintf("warning: extra close tag %s - ignoring\n", tok); + state = 10; + } + } + else + { + lprintf("error: xml struct, tok=%s, waited_tok=%s\n", tok, root_names[rec]); return -1; } break; @@ -328,7 +351,7 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r case 4: switch (res) { case (T_M_STOP_1): - return 0; + return retval; break; default: lprintf("error: unexpected token \"%s\", state %d\n", tok, state); @@ -435,6 +458,19 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r state = 9; break; } + break; + + /* > expected (following unmatched "child) && (!tmp_node->child->next)) { *root_node = tmp_node->child; free_xml_node(tmp_node); @@ -471,6 +514,10 @@ int xml_parser_build_tree(xml_node_t **root_node) { return res; } +int xml_parser_build_tree(xml_node_t **root_node) { + return xml_parser_build_tree_relaxed (root_node, 0); +} + const char *xml_parser_get_property (const xml_node_t *node, const char *name) { xml_property_t *prop; -- cgit v1.2.3 From 5fb4b3b17479cba362568cc5ec99442f92f3d819 Mon Sep 17 00:00:00 2001 From: Darren Salt Date: Sat, 21 Apr 2007 23:10:53 +0100 Subject: Use an enumeration for XML parser state tracking. --- src/xine-utils/xmlparser.c | 58 ++++++++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 22 deletions(-) (limited to 'src/xine-utils/xmlparser.c') diff --git a/src/xine-utils/xmlparser.c b/src/xine-utils/xmlparser.c index 363c6381f..f48fe696c 100644 --- a/src/xine-utils/xmlparser.c +++ b/src/xine-utils/xmlparser.c @@ -152,15 +152,30 @@ void xml_parser_free_tree(xml_node_t *current_node) { xml_parser_free_tree_rec(current_node, 1); } -#define STATE_IDLE 0 -#define STATE_NODE 1 -#define STATE_COMMENT 7 +typedef enum { + /*0*/ + STATE_IDLE, + /* */ + STATE_NODE, + STATE_ATTRIBUTE, + STATE_NODE_CLOSE, + STATE_TAG_TERM, + STATE_ATTRIBUTE_EQUALS, + STATE_STRING, + /* */ + STATE_Q_NODE, + /* Others */ + STATE_COMMENT, + STATE_DOCTYPE, +} parser_state_t; + +#define Q_STATE(CURRENT,NEW) (STATE_##NEW + state - STATE_##CURRENT) static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int rec) { char tok[TOKEN_SIZE]; char property_name[TOKEN_SIZE]; char node_name[TOKEN_SIZE]; - int state = STATE_IDLE; + parser_state_t state = STATE_IDLE; int res = 0; int parse_res; int bypass_get_token = 0; @@ -189,16 +204,16 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r state = STATE_NODE; break; case (T_M_START_2): - state = 3; + state = STATE_NODE_CLOSE; break; case (T_C_START): state = STATE_COMMENT; break; case (T_TI_START): - state = 8; + state = STATE_Q_NODE; break; case (T_DOCTYPE_START): - state = 9; + state = STATE_DOCTYPE; break; case (T_DATA): /* current data */ @@ -236,7 +251,8 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r break; } break; - case 2: + + case STATE_ATTRIBUTE: switch (res) { case (T_EOL): case (T_SEPAR): @@ -293,7 +309,7 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r strtoupper(tok); } strcpy(property_name, tok); - state = 5; + state = STATE_ATTRIBUTE_EQUALS; lprintf("info: current property name \"%s\"\n", property_name); break; default: @@ -303,7 +319,7 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r } break; - case 3: + case STATE_NODE_CLOSE: switch (res) { case (T_IDENT): /* must be equal to root_name */ @@ -311,7 +327,7 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r strtoupper(tok); } if (strcmp(tok, root_name) == 0) { - state = 4; + state = STATE_TAG_TERM; } else { lprintf("error: xml struct, tok=%s, waited_tok=%s\n", tok, root_name); return -1; @@ -325,7 +341,7 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r break; /* > expected */ - case 4: + case STATE_TAG_TERM: switch (res) { case (T_M_STOP_1): return 0; @@ -338,18 +354,18 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r break; /* = or > or ident or separator expected */ - case 5: + case STATE_ATTRIBUTE_EQUALS: switch (res) { case (T_EOL): case (T_SEPAR): /* do nothing */ break; case (T_EQUAL): - state = 6; + state = STATE_STRING; break; case (T_IDENT): bypass_get_token = 1; /* jump to state 2 without get a new token */ - state = 2; + state = STATE_ATTRIBUTE; break; case (T_M_STOP_1): /* add a new property without value */ @@ -363,7 +379,7 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r current_property->name = strdup (property_name); lprintf("info: new property %s\n", current_property->name); bypass_get_token = 1; /* jump to state 2 without get a new token */ - state = 2; + state = STATE_ATTRIBUTE; break; default: lprintf("error: unexpected token \"%s\", state %d\n", tok, state); @@ -373,7 +389,7 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r break; /* string or ident or separator expected */ - case 6: + case STATE_STRING: switch (res) { case (T_EOL): case (T_SEPAR): @@ -392,7 +408,7 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r current_property->name = strdup(property_name); current_property->value = lexer_decode_entities(tok); lprintf("info: new property %s=%s\n", current_property->name, current_property->value); - state = 2; + state = STATE_ATTRIBUTE; break; default: lprintf("error: unexpected token \"%s\", state %d\n", tok, state); @@ -414,25 +430,23 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r break; /* ?> expected */ - case 8: + case STATE_Q_NODE: switch (res) { case (T_TI_STOP): state = 0; break; default: - state = 8; break; } break; /* > expected */ - case 9: + case STATE_DOCTYPE: switch (res) { case (T_M_STOP_1): state = 0; break; default: - state = 9; break; } break; -- cgit v1.2.3 From 497d74fe5b6a68b904a6944a9c60dd3c631654a6 Mon Sep 17 00:00:00 2001 From: Darren Salt Date: Sat, 21 Apr 2007 23:36:18 +0100 Subject: Stop ignoring elements, and parse them for attributes. In the XML data structure returned by xml_parser_build_tree(), the primary content is the directly-returned node, and the extra elements follow this (use ->next) though they appear first in the XML text, thus maintaining backward compatibility. --- src/xine-utils/xmlparser.c | 109 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 89 insertions(+), 20 deletions(-) (limited to 'src/xine-utils/xmlparser.c') diff --git a/src/xine-utils/xmlparser.c b/src/xine-utils/xmlparser.c index f48fe696c..c8723a12e 100644 --- a/src/xine-utils/xmlparser.c +++ b/src/xine-utils/xmlparser.c @@ -164,6 +164,11 @@ typedef enum { STATE_STRING, /* */ STATE_Q_NODE, + STATE_Q_ATTRIBUTE, + STATE_Q_NODE_CLOSE, + STATE_Q_TAG_TERM, + STATE_Q_ATTRIBUTE_EQUALS, + STATE_Q_STRING, /* Others */ STATE_COMMENT, STATE_DOCTYPE, @@ -232,6 +237,7 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r break; case STATE_NODE: + case STATE_Q_NODE: switch (res) { case (T_IDENT): properties = NULL; @@ -241,8 +247,13 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r if (xml_parser_mode == XML_PARSER_CASE_INSENSITIVE) { strtoupper(tok); } - strcpy(node_name, tok); - state = 2; + if (state == STATE_Q_NODE) { + snprintf (node_name, TOKEN_SIZE, "?%s", tok); + state = STATE_Q_ATTRIBUTE; + } else { + strcpy(node_name, tok); + state = STATE_ATTRIBUTE; + } lprintf("info: current node name \"%s\"\n", node_name); break; default: @@ -284,6 +295,7 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r case (T_M_STOP_2): /* new leaf */ /* new subtree */ + new_leaf: subtree = new_xml_node(); /* set node name */ @@ -305,11 +317,12 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r break; case (T_IDENT): /* save property name */ + new_prop: if (xml_parser_mode == XML_PARSER_CASE_INSENSITIVE) { strtoupper(tok); } strcpy(property_name, tok); - state = STATE_ATTRIBUTE_EQUALS; + state = Q_STATE(ATTRIBUTE, ATTRIBUTE_EQUALS); lprintf("info: current property name \"%s\"\n", property_name); break; default: @@ -319,6 +332,23 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r } break; + case STATE_Q_ATTRIBUTE: + switch (res) { + case (T_EOL): + case (T_SEPAR): + /* nothing */ + break; + case (T_TI_STOP): + goto new_leaf; + case (T_IDENT): + goto new_prop; + default: + lprintf("error: unexpected token \"%s\", state %d\n", tok, state); + return -1; + break; + } + break; + case STATE_NODE_CLOSE: switch (res) { case (T_IDENT): @@ -386,10 +416,46 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r return -1; break; } + break; + + /* = or ?> or ident or separator expected */ + case STATE_Q_ATTRIBUTE_EQUALS: + switch (res) { + case (T_EOL): + case (T_SEPAR): + /* do nothing */ + break; + case (T_EQUAL): + state = STATE_Q_STRING; + break; + case (T_IDENT): + bypass_get_token = 1; /* jump to state 2 without get a new token */ + state = STATE_Q_ATTRIBUTE; + break; + case (T_TI_STOP): + /* add a new property without value */ + if (current_property == NULL) { + properties = new_xml_property(); + current_property = properties; + } else { + current_property->next = new_xml_property(); + current_property = current_property->next; + } + current_property->name = strdup (property_name); + lprintf("info: new property %s\n", current_property->name); + bypass_get_token = 1; /* jump to state 2 without get a new token */ + state = STATE_Q_ATTRIBUTE; + break; + default: + lprintf("error: unexpected token \"%s\", state %d\n", tok, state); + return -1; + break; + } break; /* string or ident or separator expected */ case STATE_STRING: + case STATE_Q_STRING: switch (res) { case (T_EOL): case (T_SEPAR): @@ -408,7 +474,7 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r current_property->name = strdup(property_name); current_property->value = lexer_decode_entities(tok); lprintf("info: new property %s=%s\n", current_property->name, current_property->value); - state = STATE_ATTRIBUTE; + state = Q_STATE(STRING, ATTRIBUTE); break; default: lprintf("error: unexpected token \"%s\", state %d\n", tok, state); @@ -423,18 +489,6 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r case (T_C_STOP): state = STATE_IDLE; break; - default: - state = STATE_COMMENT; - break; - } - break; - - /* ?> expected */ - case STATE_Q_NODE: - switch (res) { - case (T_TI_STOP): - state = 0; - break; default: break; } @@ -468,13 +522,25 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r } int xml_parser_build_tree(xml_node_t **root_node) { - xml_node_t *tmp_node; + xml_node_t *tmp_node, *pri_node, *q_node = NULL; int res; tmp_node = new_xml_node(); res = xml_parser_get_node(tmp_node, "", 0); - if ((tmp_node->child) && (!tmp_node->child->next)) { - *root_node = tmp_node->child; + + /* find first non- node */; + for (pri_node = tmp_node->child; + pri_node && pri_node->name[0] == '?'; + pri_node = pri_node->next) + q_node = pri_node; /* last node (eventually), or NULL */ + + if (pri_node && !pri_node->next) { + /* move the tail to the head (for compatibility reasons) */ + if (q_node) { + pri_node->next = tmp_node->child; + q_node->next = NULL; + } + *root_node = pri_node; free_xml_node(tmp_node); res = 0; } else { @@ -603,5 +669,8 @@ static void xml_parser_dump_node (const xml_node_t *node, int indent) { } void xml_parser_dump_tree (const xml_node_t *node) { - xml_parser_dump_node (node, 0); + do { + xml_parser_dump_node (node, 0); + node = node->next; + } while (node); } -- cgit v1.2.3 From 559008cadf0bc3457696e145b0abe764132fe92d Mon Sep 17 00:00:00 2001 From: Darren Salt Date: Sun, 22 Apr 2007 00:13:46 +0100 Subject: Don't drop elements with missing close tags. --- src/xine-utils/xmlparser.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/xine-utils/xmlparser.c') diff --git a/src/xine-utils/xmlparser.c b/src/xine-utils/xmlparser.c index 82e1f0ecd..7140bea5d 100644 --- a/src/xine-utils/xmlparser.c +++ b/src/xine-utils/xmlparser.c @@ -256,11 +256,7 @@ static int xml_parser_get_node_internal (xml_node_t *current_node, char *root_na lprintf("info: rec %d new subtree %s\n", rec, node_name); root_names[rec + 1] = node_name; parse_res = xml_parser_get_node_internal(subtree, root_names, rec + 1, relaxed); - if (parse_res < -1) { - /* badly-formed XML (missing close tag) */ - return parse_res + 1 + (parse_res == -2); - } - if (parse_res != 0) { + if (parse_res == -1 || parse_res > 0) { return parse_res; } if (current_subtree == NULL) { @@ -270,6 +266,10 @@ static int xml_parser_get_node_internal (xml_node_t *current_node, char *root_na current_subtree->next = subtree; current_subtree = subtree; } + if (parse_res < -1) { + /* badly-formed XML (missing close tag) */ + return parse_res + 1 + (parse_res == -2); + } state = STATE_IDLE; break; case (T_M_STOP_2): -- cgit v1.2.3 From 0ba64cf66fef7411ae2626027c7dfa4a00d4f432 Mon Sep 17 00:00:00 2001 From: Darren Salt Date: Mon, 23 Apr 2007 23:49:00 +0100 Subject: Kill a "may be used uninitialised" warning. --- src/xine-utils/xmlparser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/xine-utils/xmlparser.c') diff --git a/src/xine-utils/xmlparser.c b/src/xine-utils/xmlparser.c index 93217637d..a63ac39d6 100644 --- a/src/xine-utils/xmlparser.c +++ b/src/xine-utils/xmlparser.c @@ -566,7 +566,7 @@ static int xml_parser_get_node (xml_node_t *current_node, int relaxed) } int xml_parser_build_tree_relaxed(xml_node_t **root_node, int relaxed) { - xml_node_t *tmp_node, *pri_node, *q_node; + xml_node_t *tmp_node, *pri_node, *q_node = NULL; int res; tmp_node = new_xml_node(); -- cgit v1.2.3