diff options
-rw-r--r-- | ChangeLog | 3 | ||||
-rw-r--r-- | src/xine-utils/xmllexer.c | 85 | ||||
-rw-r--r-- | src/xine-utils/xmlparser.c | 219 | ||||
-rw-r--r-- | src/xine-utils/xmlparser.h | 1 |
4 files changed, 232 insertions, 76 deletions
@@ -34,6 +34,9 @@ xine-lib (1.1.90) (Unreleased) consider alternative daemons. Note: aRTs will not be present in KDE 4. * Convert the FAQ and the Hacker's Guide from DocBook SGML to DocBook 4.4 XML, and the figures from fig files to SVG files. + * The XML parser no longer ignores <?...?> elements. + Such elements are handled as if they were after the XML content for + compatibility reasons, though they must appear before it. xine-lib (1.1.8) (Unreleased) * Send a channel-changed event to the frontend when receiving the SYNC diff --git a/src/xine-utils/xmllexer.c b/src/xine-utils/xmllexer.c index 754a006f9..028a41673 100644 --- a/src/xine-utils/xmllexer.c +++ b/src/xine-utils/xmllexer.c @@ -61,9 +61,27 @@ void lexer_init(const char * buf, int size) { lprintf("buffer length %d\n", size); } +typedef enum { + STATE_UNKNOWN = -1, + STATE_IDLE, + STATE_EOL, + STATE_SEPAR, + STATE_T_M_START, + STATE_T_M_STOP_1, + STATE_T_M_STOP_2, + STATE_T_EQUAL, + STATE_T_STRING_SINGLE, + STATE_T_STRING_DOUBLE, + STATE_T_COMMENT, + STATE_T_TI_STOP, + STATE_T_DASHDASH, + STATE_T_C_STOP, + STATE_IDENT /* must be last */ +} lexer_state_t; + int lexer_get_token(char * tok, int tok_size) { int tok_pos = 0; - int state = 0; + lexer_state_t state = STATE_IDLE; char c; if (tok) { @@ -75,69 +93,70 @@ int lexer_get_token(char * tok, int tok_size) { /* normal mode */ switch (state) { /* init state */ - case 0: + case STATE_IDLE: switch (c) { case '\n': case '\r': - state = 1; + state = STATE_EOL; tok[tok_pos] = c; tok_pos++; break; case ' ': case '\t': - state = 2; + state = STATE_SEPAR; tok[tok_pos] = c; tok_pos++; break; case '<': - state = 3; + state = STATE_T_M_START; tok[tok_pos] = c; tok_pos++; break; case '>': - state = 4; + state = STATE_T_M_STOP_1; tok[tok_pos] = c; tok_pos++; break; case '/': if (!in_comment) - state = 5; + state = STATE_T_M_STOP_2; tok[tok_pos] = c; tok_pos++; break; case '=': - state = 6; + state = STATE_T_EQUAL; tok[tok_pos] = c; tok_pos++; break; case '\"': /* " */ - state = 7; + state = STATE_T_STRING_DOUBLE; break; case '\'': /* " */ - state = 12; + state = STATE_T_STRING_SINGLE; break; case '-': - state = 10; + state = STATE_T_DASHDASH; tok[tok_pos] = c; tok_pos++; break; case '?': - state = 9; + if (!in_comment) + state = STATE_T_TI_STOP; tok[tok_pos] = c; tok_pos++; break; default: - state = 100; + state = STATE_IDENT; tok[tok_pos] = c; tok_pos++; break; @@ -146,7 +165,7 @@ int lexer_get_token(char * tok, int tok_size) { break; /* end of line */ - case 1: + case STATE_EOL: if (c == '\n' || (c == '\r')) { tok[tok_pos] = c; lexbuf_pos++; @@ -158,7 +177,7 @@ int lexer_get_token(char * tok, int tok_size) { break; /* T_SEPAR */ - case 2: + case STATE_SEPAR: if (c == ' ' || (c == '\t')) { tok[tok_pos] = c; lexbuf_pos++; @@ -170,7 +189,7 @@ int lexer_get_token(char * tok, int tok_size) { break; /* T_M_START < or </ or <! or <? */ - case 3: + case STATE_T_M_START: switch (c) { case '/': tok[tok_pos] = c; @@ -183,7 +202,7 @@ int lexer_get_token(char * tok, int tok_size) { tok[tok_pos] = c; lexbuf_pos++; tok_pos++; - state = 8; + state = STATE_T_COMMENT; break; case '?': tok[tok_pos] = c; @@ -199,7 +218,7 @@ int lexer_get_token(char * tok, int tok_size) { break; /* T_M_STOP_1 */ - case 4: + case STATE_T_M_STOP_1: tok[tok_pos] = '\0'; if (!in_comment) lex_mode = DATA; @@ -207,7 +226,7 @@ int lexer_get_token(char * tok, int tok_size) { break; /* T_M_STOP_2 */ - case 5: + case STATE_T_M_STOP_2: if (c == '>') { tok[tok_pos] = c; lexbuf_pos++; @@ -223,13 +242,13 @@ int lexer_get_token(char * tok, int tok_size) { break; /* T_EQUAL */ - case 6: + case STATE_T_EQUAL: tok[tok_pos] = '\0'; return T_EQUAL; break; /* T_STRING */ - case 7: + case STATE_T_STRING_DOUBLE: tok[tok_pos] = c; lexbuf_pos++; if (c == '\"') { /* " */ @@ -240,7 +259,7 @@ int lexer_get_token(char * tok, int tok_size) { break; /* T_C_START or T_DOCTYPE_START */ - case 8: + case STATE_T_COMMENT: switch (c) { case '-': lexbuf_pos++; @@ -271,12 +290,14 @@ int lexer_get_token(char * tok, int tok_size) { break; /* T_TI_STOP */ - case 9: + case STATE_T_TI_STOP: if (c == '>') { tok[tok_pos] = c; lexbuf_pos++; tok_pos++; /* FIXME */ tok[tok_pos] = '\0'; + if (!in_comment) + lex_mode = DATA; return T_TI_STOP; } else { tok[tok_pos] = '\0'; @@ -285,24 +306,24 @@ int lexer_get_token(char * tok, int tok_size) { break; /* -- */ - case 10: + case STATE_T_DASHDASH: switch (c) { case '-': tok[tok_pos] = c; tok_pos++; lexbuf_pos++; - state = 11; + state = STATE_T_C_STOP; break; default: tok[tok_pos] = c; tok_pos++; lexbuf_pos++; - state = 100; + state = STATE_IDENT; } break; /* --> */ - case 11: + case STATE_T_C_STOP: switch (c) { case '>': tok[tok_pos] = c; @@ -322,12 +343,12 @@ int lexer_get_token(char * tok, int tok_size) { tok[tok_pos] = c; tok_pos++; lexbuf_pos++; - state = 100; + state = STATE_IDENT; } break; /* T_STRING (single quotes) */ - case 12: + case STATE_T_STRING_SINGLE: tok[tok_pos] = c; lexbuf_pos++; if (c == '\'') { /* " */ @@ -338,7 +359,7 @@ int lexer_get_token(char * tok, int tok_size) { break; /* IDENT */ - case 100: + case STATE_IDENT: switch (c) { case '<': case '>': @@ -355,13 +376,13 @@ int lexer_get_token(char * tok, int tok_size) { tok[tok_pos] = c; tok_pos++; lexbuf_pos++; - state = 9; + state = STATE_T_TI_STOP; break; case '-': tok[tok_pos] = c; tok_pos++; lexbuf_pos++; - state = 10; + state = STATE_T_DASHDASH; break; default: tok[tok_pos] = c; diff --git a/src/xine-utils/xmlparser.c b/src/xine-utils/xmlparser.c index 363c6381f..a63ac39d6 100644 --- a/src/xine-utils/xmlparser.c +++ b/src/xine-utils/xmlparser.c @@ -152,18 +152,41 @@ void xml_parser_free_tree(xml_node_t *current_node) { xml_parser_free_tree_rec(current_node, 1); } -#define STATE_IDLE 0 -#define STATE_NODE 1 -#define STATE_COMMENT 7 - -static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int rec) { +typedef enum { + /*0*/ + STATE_IDLE, + /* <foo ...> */ + STATE_NODE, + STATE_ATTRIBUTE, + STATE_NODE_CLOSE, + STATE_TAG_TERM, + STATE_ATTRIBUTE_EQUALS, + STATE_STRING, + STATE_TAG_TERM_IGNORE, + /* <?foo ...?> */ + STATE_Q_NODE, + STATE_Q_ATTRIBUTE, + STATE_Q_NODE_CLOSE, + STATE_Q_TAG_TERM, + STATE_Q_ATTRIBUTE_EQUALS, + STATE_Q_STRING, + /* Others */ + STATE_COMMENT, + STATE_DOCTYPE, +} parser_state_t; + +#define Q_STATE(CURRENT,NEW) (STATE_##NEW + state - STATE_##CURRENT) + +static int xml_parser_get_node_internal (xml_node_t *current_node, char *root_names[], int rec, int relaxed) +{ char tok[TOKEN_SIZE]; char property_name[TOKEN_SIZE]; char node_name[TOKEN_SIZE]; - int state = STATE_IDLE; + parser_state_t state = STATE_IDLE; int res = 0; int parse_res; int bypass_get_token = 0; + int retval = 0; /* used when state==4; non-0 if there are missing </...> */ xml_node_t *subtree = NULL; xml_node_t *current_subtree = NULL; xml_property_t *current_property = NULL; @@ -183,22 +206,22 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r /* do nothing */ break; case (T_EOF): - return 0; /* normal end */ + return retval; /* normal end */ break; case (T_M_START_1): state = STATE_NODE; break; case (T_M_START_2): - state = 3; + state = STATE_NODE_CLOSE; break; case (T_C_START): state = STATE_COMMENT; break; case (T_TI_START): - state = 8; + state = STATE_Q_NODE; break; case (T_DOCTYPE_START): - state = 9; + state = STATE_DOCTYPE; break; case (T_DATA): /* current data */ @@ -217,6 +240,7 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r break; case STATE_NODE: + case STATE_Q_NODE: switch (res) { case (T_IDENT): properties = NULL; @@ -226,8 +250,13 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r if (xml_parser_mode == XML_PARSER_CASE_INSENSITIVE) { strtoupper(tok); } - strcpy(node_name, tok); - state = 2; + if (state == STATE_Q_NODE) { + snprintf (node_name, TOKEN_SIZE, "?%s", tok); + state = STATE_Q_ATTRIBUTE; + } else { + strcpy(node_name, tok); + state = STATE_ATTRIBUTE; + } lprintf("info: current node name \"%s\"\n", node_name); break; default: @@ -236,7 +265,8 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r break; } break; - case 2: + + case STATE_ATTRIBUTE: switch (res) { case (T_EOL): case (T_SEPAR): @@ -252,8 +282,9 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r /* set node propertys */ subtree->props = properties; lprintf("info: rec %d new subtree %s\n", rec, node_name); - parse_res = xml_parser_get_node(subtree, node_name, rec + 1); - if (parse_res != 0) { + root_names[rec + 1] = node_name; + parse_res = xml_parser_get_node_internal(subtree, root_names, rec + 1, relaxed); + if (parse_res == -1 || parse_res > 0) { return parse_res; } if (current_subtree == NULL) { @@ -263,11 +294,16 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r current_subtree->next = subtree; current_subtree = subtree; } + if (parse_res < -1) { + /* badly-formed XML (missing close tag) */ + return parse_res + 1 + (parse_res == -2); + } state = STATE_IDLE; break; case (T_M_STOP_2): /* new leaf */ /* new subtree */ + new_leaf: subtree = new_xml_node(); /* set node name */ @@ -289,11 +325,12 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r break; case (T_IDENT): /* save property name */ + new_prop: if (xml_parser_mode == XML_PARSER_CASE_INSENSITIVE) { strtoupper(tok); } strcpy(property_name, tok); - state = 5; + state = Q_STATE(ATTRIBUTE, ATTRIBUTE_EQUALS); lprintf("info: current property name \"%s\"\n", property_name); break; default: @@ -303,17 +340,50 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r } break; - case 3: + case STATE_Q_ATTRIBUTE: + switch (res) { + case (T_EOL): + case (T_SEPAR): + /* nothing */ + break; + case (T_TI_STOP): + goto new_leaf; + case (T_IDENT): + goto new_prop; + default: + lprintf("error: unexpected token \"%s\", state %d\n", tok, state); + return -1; + break; + } + break; + + case STATE_NODE_CLOSE: switch (res) { case (T_IDENT): /* must be equal to root_name */ if (xml_parser_mode == XML_PARSER_CASE_INSENSITIVE) { strtoupper(tok); } - if (strcmp(tok, root_name) == 0) { - state = 4; - } else { - lprintf("error: xml struct, tok=%s, waited_tok=%s\n", tok, root_name); + if (strcmp(tok, root_names[rec]) == 0) { + state = STATE_TAG_TERM; + } else if (relaxed) { + int r = rec; + while (--r >= 0) + if (strcmp(tok, root_names[r]) == 0) { + lprintf("warning: wanted %s, got %s - assuming missing close tags\n", root_names[rec], tok); + retval = r - rec - 1; /* -1 - (no. of implied close tags) */ + state = STATE_TAG_TERM; + break; + } + /* relaxed parsing, ignoring extra close tag (but we don't handle out-of-order) */ + if (r < 0) { + lprintf("warning: extra close tag %s - ignoring\n", tok); + state = STATE_TAG_TERM_IGNORE; + } + } + else + { + lprintf("error: xml struct, tok=%s, waited_tok=%s\n", tok, root_names[rec]); return -1; } break; @@ -325,10 +395,10 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r break; /* > expected */ - case 4: + case STATE_TAG_TERM: switch (res) { case (T_M_STOP_1): - return 0; + return retval; break; default: lprintf("error: unexpected token \"%s\", state %d\n", tok, state); @@ -338,18 +408,18 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r break; /* = or > or ident or separator expected */ - case 5: + case STATE_ATTRIBUTE_EQUALS: switch (res) { case (T_EOL): case (T_SEPAR): /* do nothing */ break; case (T_EQUAL): - state = 6; + state = STATE_STRING; break; case (T_IDENT): bypass_get_token = 1; /* jump to state 2 without get a new token */ - state = 2; + state = STATE_ATTRIBUTE; break; case (T_M_STOP_1): /* add a new property without value */ @@ -363,7 +433,42 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r current_property->name = strdup (property_name); lprintf("info: new property %s\n", current_property->name); bypass_get_token = 1; /* jump to state 2 without get a new token */ - state = 2; + state = STATE_ATTRIBUTE; + break; + default: + lprintf("error: unexpected token \"%s\", state %d\n", tok, state); + return -1; + break; + } + break; + + /* = or ?> or ident or separator expected */ + case STATE_Q_ATTRIBUTE_EQUALS: + switch (res) { + case (T_EOL): + case (T_SEPAR): + /* do nothing */ + break; + case (T_EQUAL): + state = STATE_Q_STRING; + break; + case (T_IDENT): + bypass_get_token = 1; /* jump to state 2 without get a new token */ + state = STATE_Q_ATTRIBUTE; + break; + case (T_TI_STOP): + /* add a new property without value */ + if (current_property == NULL) { + properties = new_xml_property(); + current_property = properties; + } else { + current_property->next = new_xml_property(); + current_property = current_property->next; + } + current_property->name = strdup (property_name); + lprintf("info: new property %s\n", current_property->name); + bypass_get_token = 1; /* jump to state 2 without get a new token */ + state = STATE_Q_ATTRIBUTE; break; default: lprintf("error: unexpected token \"%s\", state %d\n", tok, state); @@ -373,7 +478,8 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r break; /* string or ident or separator expected */ - case 6: + case STATE_STRING: + case STATE_Q_STRING: switch (res) { case (T_EOL): case (T_SEPAR): @@ -392,7 +498,7 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r current_property->name = strdup(property_name); current_property->value = lexer_decode_entities(tok); lprintf("info: new property %s=%s\n", current_property->name, current_property->value); - state = 2; + state = Q_STATE(STRING, ATTRIBUTE); break; default: lprintf("error: unexpected token \"%s\", state %d\n", tok, state); @@ -408,31 +514,30 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r state = STATE_IDLE; break; default: - state = STATE_COMMENT; break; } break; - /* ?> expected */ - case 8: + /* > expected */ + case STATE_DOCTYPE: switch (res) { - case (T_TI_STOP): + case (T_M_STOP_1): state = 0; break; default: - state = 8; break; } break; - /* > expected */ - case 9: + /* > expected (following unmatched "</...") */ + case STATE_TAG_TERM_IGNORE: switch (res) { case (T_M_STOP_1): - state = 0; + state = STATE_IDLE; break; default: - state = 9; + lprintf("error: unexpected token \"%s\", state %d\n", tok, state); + return -1; break; } break; @@ -453,14 +558,33 @@ static int xml_parser_get_node (xml_node_t *current_node, char *root_name, int r } } -int xml_parser_build_tree(xml_node_t **root_node) { - xml_node_t *tmp_node; +static int xml_parser_get_node (xml_node_t *current_node, int relaxed) +{ + char *root_names[MAX_RECURSION + 1]; + root_names[0] = ""; + return xml_parser_get_node_internal (current_node, root_names, 0, relaxed); +} + +int xml_parser_build_tree_relaxed(xml_node_t **root_node, int relaxed) { + xml_node_t *tmp_node, *pri_node, *q_node = NULL; int res; tmp_node = new_xml_node(); - res = xml_parser_get_node(tmp_node, "", 0); - if ((tmp_node->child) && (!tmp_node->child->next)) { - *root_node = tmp_node->child; + res = xml_parser_get_node(tmp_node, relaxed); + + /* find first non-<?...?> node */; + for (pri_node = tmp_node->child; + pri_node && pri_node->name[0] == '?'; + pri_node = pri_node->next) + q_node = pri_node; /* last <?...?> node (eventually), or NULL */ + + if (pri_node && !pri_node->next) { + /* move the tail to the head (for compatibility reasons) */ + if (q_node) { + pri_node->next = tmp_node->child; + q_node->next = NULL; + } + *root_node = pri_node; free_xml_node(tmp_node); res = 0; } else { @@ -471,6 +595,10 @@ int xml_parser_build_tree(xml_node_t **root_node) { return res; } +int xml_parser_build_tree(xml_node_t **root_node) { + return xml_parser_build_tree_relaxed (root_node, 0); +} + const char *xml_parser_get_property (const xml_node_t *node, const char *name) { xml_property_t *prop; @@ -589,5 +717,8 @@ static void xml_parser_dump_node (const xml_node_t *node, int indent) { } void xml_parser_dump_tree (const xml_node_t *node) { - xml_parser_dump_node (node, 0); + do { + xml_parser_dump_node (node, 0); + node = node->next; + } while (node); } diff --git a/src/xine-utils/xmlparser.h b/src/xine-utils/xmlparser.h index f202ca28d..8d4915abf 100644 --- a/src/xine-utils/xmlparser.h +++ b/src/xine-utils/xmlparser.h @@ -57,6 +57,7 @@ typedef struct xml_node_s { void xml_parser_init(const char * buf, int size, int mode) XINE_PROTECTED; int xml_parser_build_tree(xml_node_t **root_node) XINE_PROTECTED; +int xml_parser_build_tree_relaxed(xml_node_t **root_node, int relaxed) XINE_PROTECTED; void xml_parser_free_tree(xml_node_t *root_node) XINE_PROTECTED; |