diff options
Diffstat (limited to 'src/xine-utils/xmllexer.c')
-rw-r--r-- | src/xine-utils/xmllexer.c | 161 |
1 files changed, 111 insertions, 50 deletions
diff --git a/src/xine-utils/xmllexer.c b/src/xine-utils/xmllexer.c index fb504f430..a3b395a46 100644 --- a/src/xine-utils/xmllexer.c +++ b/src/xine-utils/xmllexer.c @@ -30,11 +30,11 @@ #endif #ifdef XINE_COMPILE -#include "xineutils.h" +#include <xine/xineutils.h> #else #define lprintf(...) #endif -#include "xmllexer.h" +#include <xine/xmllexer.h> #include <stdio.h> #include <ctype.h> #include <string.h> @@ -46,8 +46,6 @@ #include "bswap.h" /* private constants*/ -#define NORMAL 0 /* normal lex mode */ -#define DATA 1 /* data lex mode */ /* private global variables */ struct lexer * static_lexer; @@ -98,6 +96,13 @@ void lexer_init(const char * buf, int size) { static_lexer = lexer_init_r(buf, size); } + +enum { + NORMAL, + DATA, + CDATA, +}; + struct lexer *lexer_init_r(const char * buf, int size) { static const char boms[] = { 0xFF, 0xFE, 0, 0, 0xFE, 0xFF }, bom_utf8[] = { 0xEF, 0xBB, 0xBF }; @@ -134,6 +139,24 @@ void lexer_finalize_r(struct lexer * lexer) free(lexer); } +typedef enum { + STATE_UNKNOWN = -1, + STATE_IDLE, + STATE_EOL, + STATE_SEPAR, + STATE_T_M_START, + STATE_T_M_STOP_1, + STATE_T_M_STOP_2, + STATE_T_EQUAL, + STATE_T_STRING_SINGLE, + STATE_T_STRING_DOUBLE, + STATE_T_COMMENT, + STATE_T_TI_STOP, + STATE_T_DASHDASH, + STATE_T_C_STOP, + STATE_IDENT /* must be last */ +} lexer_state_t; + /* for ABI compatibility */ int lexer_get_token_d(char ** _tok, int * _tok_size, int fixed) { return lexer_get_token_d_r(static_lexer, _tok, _tok_size, fixed); @@ -144,7 +167,7 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int int tok_size = *_tok_size; int tok_pos = 0; - int state = 0; + lexer_state_t state = STATE_IDLE; char c; if (tok) { @@ -152,73 +175,74 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int c = lexer->lexbuf[lexer->lexbuf_pos]; lprintf("c=%c, state=%d, in_comment=%d\n", c, state, lexer->in_comment); - if (lexer->lex_mode == NORMAL) { - /* normal mode */ + switch (lexer->lex_mode) { + case NORMAL: switch (state) { /* init state */ - case 0: + case STATE_IDLE: switch (c) { case '\n': case '\r': - state = 1; + state = STATE_EOL; tok[tok_pos] = c; tok_pos++; break; case ' ': case '\t': - state = 2; + state = STATE_SEPAR; tok[tok_pos] = c; tok_pos++; break; case '<': - state = 3; + state = STATE_T_M_START; tok[tok_pos] = c; tok_pos++; break; case '>': - state = 4; + state = STATE_T_M_STOP_1; tok[tok_pos] = c; tok_pos++; break; case '/': if (!lexer->in_comment) - state = 5; + state = STATE_T_M_STOP_2; tok[tok_pos] = c; tok_pos++; break; case '=': - state = 6; + state = STATE_T_EQUAL; tok[tok_pos] = c; tok_pos++; break; case '\"': /* " */ - state = 7; + state = STATE_T_STRING_DOUBLE; break; case '\'': /* " */ - state = 12; + state = STATE_T_STRING_SINGLE; break; case '-': - state = 10; + state = STATE_T_DASHDASH; tok[tok_pos] = c; tok_pos++; break; case '?': - state = 9; + if (!lexer->in_comment) + state = STATE_T_TI_STOP; tok[tok_pos] = c; tok_pos++; break; default: - state = 100; + state = STATE_IDENT; tok[tok_pos] = c; tok_pos++; break; @@ -227,7 +251,7 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int break; /* end of line */ - case 1: + case STATE_EOL: if (c == '\n' || (c == '\r')) { tok[tok_pos] = c; lexer->lexbuf_pos++; @@ -239,7 +263,7 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int break; /* T_SEPAR */ - case 2: + case STATE_SEPAR: if (c == ' ' || (c == '\t')) { tok[tok_pos] = c; lexer->lexbuf_pos++; @@ -251,7 +275,7 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int break; /* T_M_START < or </ or <! or <? */ - case 3: + case STATE_T_M_START: switch (c) { case '/': tok[tok_pos] = c; @@ -264,7 +288,7 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int tok[tok_pos] = c; lexer->lexbuf_pos++; tok_pos++; - state = 8; + state = STATE_T_COMMENT; break; case '?': tok[tok_pos] = c; @@ -280,7 +304,7 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int break; /* T_M_STOP_1 */ - case 4: + case STATE_T_M_STOP_1: tok[tok_pos] = '\0'; if (!lexer->in_comment) lexer->lex_mode = DATA; @@ -288,7 +312,7 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int break; /* T_M_STOP_2 */ - case 5: + case STATE_T_M_STOP_2: if (c == '>') { tok[tok_pos] = c; lexer->lexbuf_pos++; @@ -304,13 +328,13 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int break; /* T_EQUAL */ - case 6: + case STATE_T_EQUAL: tok[tok_pos] = '\0'; return T_EQUAL; break; /* T_STRING */ - case 7: + case STATE_T_STRING_DOUBLE: tok[tok_pos] = c; lexer->lexbuf_pos++; if (c == '\"') { /* " */ @@ -320,8 +344,8 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int tok_pos++; break; - /* T_C_START or T_DOCTYPE_START */ - case 8: + /* T_C_START or T_DOCTYPE_START or T_CDATA_START */ + case STATE_T_COMMENT: switch (c) { case '-': lexer->lexbuf_pos++; @@ -345,6 +369,17 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int return T_ERROR; } break; + case '[': + lexer->lexbuf_pos++; + if (strncmp(lexer->lexbuf + lexer->lexbuf_pos, "CDATA[", 6) == 0) { + strncpy (tok + tok_pos, "[CDATA[", 7); /* FIXME */ + lexer->lexbuf_pos += 6; + lexer->lex_mode = CDATA; + return T_CDATA_START; + } else{ + return T_ERROR; + } + break; default: /* error */ return T_ERROR; @@ -352,12 +387,14 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int break; /* T_TI_STOP */ - case 9: + case STATE_T_TI_STOP: if (c == '>') { tok[tok_pos] = c; lexer->lexbuf_pos++; tok_pos++; /* FIXME */ tok[tok_pos] = '\0'; + if (!lexer->in_comment) + lexer->lex_mode = DATA; return T_TI_STOP; } else { tok[tok_pos] = '\0'; @@ -366,24 +403,24 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int break; /* -- */ - case 10: + case STATE_T_DASHDASH: switch (c) { case '-': tok[tok_pos] = c; tok_pos++; lexer->lexbuf_pos++; - state = 11; + state = STATE_T_C_STOP; break; default: tok[tok_pos] = c; tok_pos++; lexer->lexbuf_pos++; - state = 100; + state = STATE_IDENT; } break; /* --> */ - case 11: + case STATE_T_C_STOP: switch (c) { case '>': tok[tok_pos] = c; @@ -403,12 +440,12 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int tok[tok_pos] = c; tok_pos++; lexer->lexbuf_pos++; - state = 100; + state = STATE_IDENT; } break; /* T_STRING (single quotes) */ - case 12: + case STATE_T_STRING_SINGLE: tok[tok_pos] = c; lexer->lexbuf_pos++; if (c == '\'') { /* " */ @@ -419,7 +456,7 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int break; /* IDENT */ - case 100: + case STATE_IDENT: switch (c) { case '<': case '>': @@ -438,13 +475,13 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int tok[tok_pos] = c; tok_pos++; lexer->lexbuf_pos++; - state = 9; + state = STATE_T_TI_STOP; break; case '-': tok[tok_pos] = c; tok_pos++; lexer->lexbuf_pos++; - state = 10; + state = STATE_T_DASHDASH; break; default: tok[tok_pos] = c; @@ -456,8 +493,9 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int lprintf("expected char \'%c\'\n", tok[tok_pos - 1]); /* FIX ME */ return T_ERROR; } - } else { - /* data mode, stop if char equal '<' */ + break; + + case DATA: /* data mode, stop if char equal '<' */ switch (c) { case '<': @@ -469,6 +507,28 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int tok_pos++; lexer->lexbuf_pos++; } + break; + + case CDATA: /* cdata mode, stop if next token is "]]>" */ + switch (c) + { + case ']': + if (strncmp(lexer->lexbuf + lexer->lexbuf_pos, "]]>", 3) == 0) { + lexer->lexbuf_pos += 3; + lexer->lex_mode = DATA; + return T_CDATA_STOP; + } else { + tok[tok_pos] = c; + tok_pos++; + lexer->lexbuf_pos++; + } + break; + default: + tok[tok_pos] = c; + tok_pos++; + lexer->lexbuf_pos++; + } + break; } } lprintf ("loop done tok_pos = %d, tok_size=%d, lexbuf_pos=%d, lexbuf_size=%d\n", @@ -492,27 +552,28 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int /* Terminate the current token */ tok[tok_pos] = '\0'; switch (state) { - case 0: - case 1: - case 2: + case STATE_IDLE: + case STATE_EOL: + case STATE_SEPAR: return T_EOF; break; - case 3: + case STATE_T_M_START: return T_M_START_1; break; - case 4: + case STATE_T_M_STOP_1: return T_M_STOP_1; break; - case 5: + case STATE_T_M_STOP_2: return T_ERROR; break; - case 6: + case STATE_T_EQUAL: return T_EQUAL; break; - case 7: + case STATE_T_STRING_SINGLE: + case STATE_T_STRING_DOUBLE: return T_STRING; break; - case 100: + case STATE_IDENT: return T_DATA; break; default: |