summaryrefslogtreecommitdiff
path: root/src/xine-utils/xmllexer.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/xine-utils/xmllexer.c')
-rw-r--r--src/xine-utils/xmllexer.c161
1 files changed, 111 insertions, 50 deletions
diff --git a/src/xine-utils/xmllexer.c b/src/xine-utils/xmllexer.c
index fb504f430..a3b395a46 100644
--- a/src/xine-utils/xmllexer.c
+++ b/src/xine-utils/xmllexer.c
@@ -30,11 +30,11 @@
#endif
#ifdef XINE_COMPILE
-#include "xineutils.h"
+#include <xine/xineutils.h>
#else
#define lprintf(...)
#endif
-#include "xmllexer.h"
+#include <xine/xmllexer.h>
#include <stdio.h>
#include <ctype.h>
#include <string.h>
@@ -46,8 +46,6 @@
#include "bswap.h"
/* private constants*/
-#define NORMAL 0 /* normal lex mode */
-#define DATA 1 /* data lex mode */
/* private global variables */
struct lexer * static_lexer;
@@ -98,6 +96,13 @@ void lexer_init(const char * buf, int size) {
static_lexer = lexer_init_r(buf, size);
}
+
+enum {
+ NORMAL,
+ DATA,
+ CDATA,
+};
+
struct lexer *lexer_init_r(const char * buf, int size) {
static const char boms[] = { 0xFF, 0xFE, 0, 0, 0xFE, 0xFF },
bom_utf8[] = { 0xEF, 0xBB, 0xBF };
@@ -134,6 +139,24 @@ void lexer_finalize_r(struct lexer * lexer)
free(lexer);
}
+typedef enum {
+ STATE_UNKNOWN = -1,
+ STATE_IDLE,
+ STATE_EOL,
+ STATE_SEPAR,
+ STATE_T_M_START,
+ STATE_T_M_STOP_1,
+ STATE_T_M_STOP_2,
+ STATE_T_EQUAL,
+ STATE_T_STRING_SINGLE,
+ STATE_T_STRING_DOUBLE,
+ STATE_T_COMMENT,
+ STATE_T_TI_STOP,
+ STATE_T_DASHDASH,
+ STATE_T_C_STOP,
+ STATE_IDENT /* must be last */
+} lexer_state_t;
+
/* for ABI compatibility */
int lexer_get_token_d(char ** _tok, int * _tok_size, int fixed) {
return lexer_get_token_d_r(static_lexer, _tok, _tok_size, fixed);
@@ -144,7 +167,7 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int
int tok_size = *_tok_size;
int tok_pos = 0;
- int state = 0;
+ lexer_state_t state = STATE_IDLE;
char c;
if (tok) {
@@ -152,73 +175,74 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int
c = lexer->lexbuf[lexer->lexbuf_pos];
lprintf("c=%c, state=%d, in_comment=%d\n", c, state, lexer->in_comment);
- if (lexer->lex_mode == NORMAL) {
- /* normal mode */
+ switch (lexer->lex_mode) {
+ case NORMAL:
switch (state) {
/* init state */
- case 0:
+ case STATE_IDLE:
switch (c) {
case '\n':
case '\r':
- state = 1;
+ state = STATE_EOL;
tok[tok_pos] = c;
tok_pos++;
break;
case ' ':
case '\t':
- state = 2;
+ state = STATE_SEPAR;
tok[tok_pos] = c;
tok_pos++;
break;
case '<':
- state = 3;
+ state = STATE_T_M_START;
tok[tok_pos] = c;
tok_pos++;
break;
case '>':
- state = 4;
+ state = STATE_T_M_STOP_1;
tok[tok_pos] = c;
tok_pos++;
break;
case '/':
if (!lexer->in_comment)
- state = 5;
+ state = STATE_T_M_STOP_2;
tok[tok_pos] = c;
tok_pos++;
break;
case '=':
- state = 6;
+ state = STATE_T_EQUAL;
tok[tok_pos] = c;
tok_pos++;
break;
case '\"': /* " */
- state = 7;
+ state = STATE_T_STRING_DOUBLE;
break;
case '\'': /* " */
- state = 12;
+ state = STATE_T_STRING_SINGLE;
break;
case '-':
- state = 10;
+ state = STATE_T_DASHDASH;
tok[tok_pos] = c;
tok_pos++;
break;
case '?':
- state = 9;
+ if (!lexer->in_comment)
+ state = STATE_T_TI_STOP;
tok[tok_pos] = c;
tok_pos++;
break;
default:
- state = 100;
+ state = STATE_IDENT;
tok[tok_pos] = c;
tok_pos++;
break;
@@ -227,7 +251,7 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int
break;
/* end of line */
- case 1:
+ case STATE_EOL:
if (c == '\n' || (c == '\r')) {
tok[tok_pos] = c;
lexer->lexbuf_pos++;
@@ -239,7 +263,7 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int
break;
/* T_SEPAR */
- case 2:
+ case STATE_SEPAR:
if (c == ' ' || (c == '\t')) {
tok[tok_pos] = c;
lexer->lexbuf_pos++;
@@ -251,7 +275,7 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int
break;
/* T_M_START < or </ or <! or <? */
- case 3:
+ case STATE_T_M_START:
switch (c) {
case '/':
tok[tok_pos] = c;
@@ -264,7 +288,7 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int
tok[tok_pos] = c;
lexer->lexbuf_pos++;
tok_pos++;
- state = 8;
+ state = STATE_T_COMMENT;
break;
case '?':
tok[tok_pos] = c;
@@ -280,7 +304,7 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int
break;
/* T_M_STOP_1 */
- case 4:
+ case STATE_T_M_STOP_1:
tok[tok_pos] = '\0';
if (!lexer->in_comment)
lexer->lex_mode = DATA;
@@ -288,7 +312,7 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int
break;
/* T_M_STOP_2 */
- case 5:
+ case STATE_T_M_STOP_2:
if (c == '>') {
tok[tok_pos] = c;
lexer->lexbuf_pos++;
@@ -304,13 +328,13 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int
break;
/* T_EQUAL */
- case 6:
+ case STATE_T_EQUAL:
tok[tok_pos] = '\0';
return T_EQUAL;
break;
/* T_STRING */
- case 7:
+ case STATE_T_STRING_DOUBLE:
tok[tok_pos] = c;
lexer->lexbuf_pos++;
if (c == '\"') { /* " */
@@ -320,8 +344,8 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int
tok_pos++;
break;
- /* T_C_START or T_DOCTYPE_START */
- case 8:
+ /* T_C_START or T_DOCTYPE_START or T_CDATA_START */
+ case STATE_T_COMMENT:
switch (c) {
case '-':
lexer->lexbuf_pos++;
@@ -345,6 +369,17 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int
return T_ERROR;
}
break;
+ case '[':
+ lexer->lexbuf_pos++;
+ if (strncmp(lexer->lexbuf + lexer->lexbuf_pos, "CDATA[", 6) == 0) {
+ strncpy (tok + tok_pos, "[CDATA[", 7); /* FIXME */
+ lexer->lexbuf_pos += 6;
+ lexer->lex_mode = CDATA;
+ return T_CDATA_START;
+ } else{
+ return T_ERROR;
+ }
+ break;
default:
/* error */
return T_ERROR;
@@ -352,12 +387,14 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int
break;
/* T_TI_STOP */
- case 9:
+ case STATE_T_TI_STOP:
if (c == '>') {
tok[tok_pos] = c;
lexer->lexbuf_pos++;
tok_pos++; /* FIXME */
tok[tok_pos] = '\0';
+ if (!lexer->in_comment)
+ lexer->lex_mode = DATA;
return T_TI_STOP;
} else {
tok[tok_pos] = '\0';
@@ -366,24 +403,24 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int
break;
/* -- */
- case 10:
+ case STATE_T_DASHDASH:
switch (c) {
case '-':
tok[tok_pos] = c;
tok_pos++;
lexer->lexbuf_pos++;
- state = 11;
+ state = STATE_T_C_STOP;
break;
default:
tok[tok_pos] = c;
tok_pos++;
lexer->lexbuf_pos++;
- state = 100;
+ state = STATE_IDENT;
}
break;
/* --> */
- case 11:
+ case STATE_T_C_STOP:
switch (c) {
case '>':
tok[tok_pos] = c;
@@ -403,12 +440,12 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int
tok[tok_pos] = c;
tok_pos++;
lexer->lexbuf_pos++;
- state = 100;
+ state = STATE_IDENT;
}
break;
/* T_STRING (single quotes) */
- case 12:
+ case STATE_T_STRING_SINGLE:
tok[tok_pos] = c;
lexer->lexbuf_pos++;
if (c == '\'') { /* " */
@@ -419,7 +456,7 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int
break;
/* IDENT */
- case 100:
+ case STATE_IDENT:
switch (c) {
case '<':
case '>':
@@ -438,13 +475,13 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int
tok[tok_pos] = c;
tok_pos++;
lexer->lexbuf_pos++;
- state = 9;
+ state = STATE_T_TI_STOP;
break;
case '-':
tok[tok_pos] = c;
tok_pos++;
lexer->lexbuf_pos++;
- state = 10;
+ state = STATE_T_DASHDASH;
break;
default:
tok[tok_pos] = c;
@@ -456,8 +493,9 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int
lprintf("expected char \'%c\'\n", tok[tok_pos - 1]); /* FIX ME */
return T_ERROR;
}
- } else {
- /* data mode, stop if char equal '<' */
+ break;
+
+ case DATA: /* data mode, stop if char equal '<' */
switch (c)
{
case '<':
@@ -469,6 +507,28 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int
tok_pos++;
lexer->lexbuf_pos++;
}
+ break;
+
+ case CDATA: /* cdata mode, stop if next token is "]]>" */
+ switch (c)
+ {
+ case ']':
+ if (strncmp(lexer->lexbuf + lexer->lexbuf_pos, "]]>", 3) == 0) {
+ lexer->lexbuf_pos += 3;
+ lexer->lex_mode = DATA;
+ return T_CDATA_STOP;
+ } else {
+ tok[tok_pos] = c;
+ tok_pos++;
+ lexer->lexbuf_pos++;
+ }
+ break;
+ default:
+ tok[tok_pos] = c;
+ tok_pos++;
+ lexer->lexbuf_pos++;
+ }
+ break;
}
}
lprintf ("loop done tok_pos = %d, tok_size=%d, lexbuf_pos=%d, lexbuf_size=%d\n",
@@ -492,27 +552,28 @@ int lexer_get_token_d_r(struct lexer * lexer, char ** _tok, int * _tok_size, int
/* Terminate the current token */
tok[tok_pos] = '\0';
switch (state) {
- case 0:
- case 1:
- case 2:
+ case STATE_IDLE:
+ case STATE_EOL:
+ case STATE_SEPAR:
return T_EOF;
break;
- case 3:
+ case STATE_T_M_START:
return T_M_START_1;
break;
- case 4:
+ case STATE_T_M_STOP_1:
return T_M_STOP_1;
break;
- case 5:
+ case STATE_T_M_STOP_2:
return T_ERROR;
break;
- case 6:
+ case STATE_T_EQUAL:
return T_EQUAL;
break;
- case 7:
+ case STATE_T_STRING_SINGLE:
+ case STATE_T_STRING_DOUBLE:
return T_STRING;
break;
- case 100:
+ case STATE_IDENT:
return T_DATA;
break;
default: