From 5b1c15d5ffb5509eddd4408e2e4495f1e5e43162 Mon Sep 17 00:00:00 2001 From: Darren Salt Date: Fri, 14 Sep 2007 20:30:04 +0100 Subject: Handle <[CDATA[...]]>. (Based on a patch by Bastien Nocera .) --- src/xine-utils/xmllexer.c | 57 ++++++++++++++++++++++++++++++++++++++-------- src/xine-utils/xmllexer.h | 2 ++ src/xine-utils/xmlparser.c | 29 ++++++++++++++++++++++- 3 files changed, 77 insertions(+), 11 deletions(-) diff --git a/src/xine-utils/xmllexer.c b/src/xine-utils/xmllexer.c index 028a41673..25b0c2a08 100644 --- a/src/xine-utils/xmllexer.c +++ b/src/xine-utils/xmllexer.c @@ -31,7 +31,7 @@ #ifdef XINE_COMPILE #include "xineutils.h" #else -#define lprintf(...) +#define lprintf printf #define xine_xmalloc malloc #endif #include "xmllexer.h" @@ -41,16 +41,19 @@ #include /* private constants*/ -#define NORMAL 0 /* normal lex mode */ -#define DATA 1 /* data lex mode */ /* private global variables */ static const char * lexbuf; static int lexbuf_size = 0; static int lexbuf_pos = 0; -static int lex_mode = NORMAL; static int in_comment = 0; +static enum { + NORMAL, + DATA, + CDATA, +} lex_mode = NORMAL; + void lexer_init(const char * buf, int size) { lexbuf = buf; lexbuf_size = size; @@ -87,10 +90,10 @@ int lexer_get_token(char * tok, int tok_size) { if (tok) { while ((tok_pos < tok_size) && (lexbuf_pos < lexbuf_size)) { c = lexbuf[lexbuf_pos]; - lprintf("c=%c, state=%d, in_comment=%d\n", c, state, in_comment); + lprintf("c=%c, state=%d, lex_mode=%d, in_comment=%d\n", c, state, lex_mode, in_comment); - if (lex_mode == NORMAL) { - /* normal mode */ + switch (lex_mode) { + case NORMAL: switch (state) { /* init state */ case STATE_IDLE: @@ -258,7 +261,7 @@ int lexer_get_token(char * tok, int tok_size) { tok_pos++; break; - /* T_C_START or T_DOCTYPE_START */ + /* T_C_START or T_DOCTYPE_START or T_CDATA_START */ case STATE_T_COMMENT: switch (c) { case '-': @@ -283,6 +286,17 @@ int lexer_get_token(char * tok, int tok_size) { return T_ERROR; } break; + case '[': + lexbuf_pos++; + if (strncmp(lexbuf + lexbuf_pos, "CDATA[", 6) == 0) { + strncpy (tok + tok_pos, "[CDATA[", 7); /* FIXME */ + lexbuf_pos += 6; + lex_mode = CDATA; + return T_CDATA_START; + } else{ + return T_ERROR; + } + break; default: /* error */ return T_ERROR; @@ -394,8 +408,9 @@ int lexer_get_token(char * tok, int tok_size) { lprintf("expected char \'%c\'\n", tok[tok_pos - 1]); /* FIX ME */ return T_ERROR; } - } else { - /* data mode, stop if char equal '<' */ + break; + + case DATA: /* data mode, stop if char equal '<' */ switch (c) { case '<': @@ -407,6 +422,28 @@ int lexer_get_token(char * tok, int tok_size) { tok_pos++; lexbuf_pos++; } + break; + + case CDATA: /* cdata mode, stop if next token is "]]>" */ + switch (c) + { + case ']': + if (strncmp(lexbuf + lexbuf_pos, "]]>", 3) == 0) { + lexbuf_pos += 3; + lex_mode = DATA; + return T_CDATA_STOP; + } else { + tok[tok_pos] = c; + tok_pos++; + lexbuf_pos++; + } + break; + default: + tok[tok_pos] = c; + tok_pos++; + lexbuf_pos++; + } + break; } } lprintf ("loop done tok_pos = %d, tok_size=%d, lexbuf_pos=%d, lexbuf_size=%d\n", diff --git a/src/xine-utils/xmllexer.h b/src/xine-utils/xmllexer.h index 5a217fcd8..524049a44 100644 --- a/src/xine-utils/xmllexer.h +++ b/src/xine-utils/xmllexer.h @@ -50,6 +50,8 @@ #define T_TI_STOP 15 /* ?> */ #define T_DOCTYPE_START 16 /* */ +#define T_CDATA_START 18 /* */ /* public functions */ diff --git a/src/xine-utils/xmlparser.c b/src/xine-utils/xmlparser.c index a9fa940db..326e21997 100644 --- a/src/xine-utils/xmlparser.c +++ b/src/xine-utils/xmlparser.c @@ -42,7 +42,7 @@ #ifdef XINE_COMPILE #include "xineutils.h" #else -#define lprintf(...) +#define lprintf printf #define xine_xmalloc malloc #endif #include "xmllexer.h" @@ -177,6 +177,7 @@ typedef enum { /* Others */ STATE_COMMENT, STATE_DOCTYPE, + STATE_CDATA, } parser_state_t; #define Q_STATE(CURRENT,NEW) (STATE_##NEW + state - STATE_##CURRENT) @@ -227,6 +228,9 @@ static int xml_parser_get_node_internal (xml_node_t *current_node, char *root_na case (T_DOCTYPE_START): state = STATE_DOCTYPE; break; + case (T_CDATA_START): + state = STATE_CDATA; + break; case (T_DATA): /* current data */ if (current_node->data) { @@ -536,6 +540,29 @@ static int xml_parser_get_node_internal (xml_node_t *current_node, char *root_na default: break; } + break; + + /* ]]> expected */ + case STATE_CDATA: + switch (res) { + case (T_CDATA_STOP): + if (current_node->data) { + /* Append to existing text. FIXME - should use a child node */ + char *data; + asprintf (&data, "%s%s", current_node->data, tok); + free (current_node->data); + current_node->data = data; + } + else + current_node->data = strdup (tok); + lprintf("info: node cdata : %s\n", current_node->data); + state = STATE_IDLE; + break; + default: + lprintf("error: unexpected token \"%s\", state %d\n", tok, state); + return -1; + break; + } break; /* > expected (following unmatched "