diff options
Diffstat (limited to 'src/xine-utils/xmllexer.c')
-rw-r--r-- | src/xine-utils/xmllexer.c | 107 |
1 files changed, 94 insertions, 13 deletions
diff --git a/src/xine-utils/xmllexer.c b/src/xine-utils/xmllexer.c index 39632938f..75a1aafec 100644 --- a/src/xine-utils/xmllexer.c +++ b/src/xine-utils/xmllexer.c @@ -26,12 +26,12 @@ */ #ifdef XINE_COMPILE -#include "xineutils.h" +#include <xine/xineutils.h> #else #define lprintf(...) #define xine_xmalloc malloc #endif -#include "xmllexer.h" +#include <xine/xmllexer.h> #include <stdio.h> #include <ctype.h> #include <string.h> @@ -40,6 +40,8 @@ #include <iconv.h> #endif +#include "bswap.h" + /* private constants*/ /* private global variables */ @@ -47,6 +49,45 @@ static const char * lexbuf; static int lexbuf_size = 0; static int lexbuf_pos = 0; static int in_comment = 0; +static char *lex_malloc = NULL; + +enum utf { UTF32BE, UTF32LE, UTF16BE, UTF16LE }; + +static void lex_convert (const char * buf, int size, enum utf utf) +{ + char *utf8 = malloc (size * (utf >= UTF16BE ? 3 : 6) + 1); + char *bp = utf8; + while (size > 0) + { + uint32_t c = 0; + switch (utf) + { + case UTF32BE: c = _X_BE_32 (buf); buf += 4; break; + case UTF32LE: c = _X_LE_32 (buf); buf += 4; break; + case UTF16BE: c = _X_BE_16 (buf); buf += 2; break; + case UTF16LE: c = _X_LE_16 (buf); buf += 2; break; + } + if (!c) + break; /* embed a NUL, get a truncated string */ + if (c < 128) + *bp++ = c; + else + { + int count = (c >= 0x04000000) ? 5 : + (c >= 0x00200000) ? 4 : + (c >= 0x00010000) ? 3 : + (c >= 0x00000800) ? 2 : 1; + *bp = (char)(0x1F80 >> count); + count *= 6; + *bp++ |= c >> count; + while ((count -= 6) >= 0) + *bp++ = 128 | ((c >> count) & 0x3F); + } + } + *bp = 0; + lexbuf_size = bp - utf8; + lexbuf = lex_malloc = realloc (utf8, lexbuf_size + 1); +} static enum { NORMAL, @@ -55,8 +96,29 @@ static enum { } lex_mode = NORMAL; void lexer_init(const char * buf, int size) { + static const char boms[] = { 0xFF, 0xFE, 0, 0, 0xFE, 0xFF }, + bom_utf8[] = { 0xEF, 0xBB, 0xBF }; + + free (lex_malloc); + lex_malloc = NULL; + lexbuf = buf; lexbuf_size = size; + + if (size >= 4 && !memcmp (buf, boms + 2, 4)) + lex_convert (buf + 4, size - 4, UTF32BE); + else if (size >= 4 && !memcmp (buf, boms, 4)) + lex_convert (buf + 4, size - 4, UTF32LE); + else if (size >= 3 && !memcmp (buf, bom_utf8, 3)) + { + lexbuf += 3; + lexbuf_size -= 3; + } + else if (size >= 2 && !memcmp (buf, boms + 4, 2)) + lex_convert (buf + 2, size - 2, UTF16BE); + else if (size >= 2 && !memcmp (buf, boms, 2)) + lex_convert (buf + 2, size - 2, UTF16LE); + lexbuf_pos = 0; lex_mode = NORMAL; in_comment = 0; @@ -82,7 +144,9 @@ typedef enum { STATE_IDENT /* must be last */ } lexer_state_t; -int lexer_get_token(char * tok, int tok_size) { +int lexer_get_token_d(char ** _tok, int * _tok_size, int fixed) { + char *tok = *_tok; + int tok_size = *_tok_size; int tok_pos = 0; lexer_state_t state = STATE_IDLE; char c; @@ -451,33 +515,44 @@ int lexer_get_token(char * tok, int tok_size) { /* pb */ if (tok_pos >= tok_size) { - lprintf("token buffer is too little\n"); + if (fixed) + return T_ERROR; + *_tok_size *= 2; + *_tok = realloc (*_tok, *_tok_size); + lprintf("token buffer is too small\n"); + lprintf("increasing buffer size to %d bytes\n", *_tok_size); + if (*_tok) { + return lexer_get_token_d (_tok, _tok_size, 0); + } else { + return T_ERROR; + } } else { if (lexbuf_pos >= lexbuf_size) { /* Terminate the current token */ tok[tok_pos] = '\0'; switch (state) { - case 0: - case 1: - case 2: + case STATE_IDLE: + case STATE_EOL: + case STATE_SEPAR: return T_EOF; break; - case 3: + case STATE_T_M_START: return T_M_START_1; break; - case 4: + case STATE_T_M_STOP_1: return T_M_STOP_1; break; - case 5: + case STATE_T_M_STOP_2: return T_ERROR; break; - case 6: + case STATE_T_EQUAL: return T_EQUAL; break; - case 7: + case STATE_T_STRING_SINGLE: + case STATE_T_STRING_DOUBLE: return T_STRING; break; - case 100: + case STATE_IDENT: return T_DATA; break; default: @@ -494,6 +569,12 @@ int lexer_get_token(char * tok, int tok_size) { return T_ERROR; } +/* for ABI compatibility */ +int lexer_get_token (char *tok, int tok_size) +{ + return lexer_get_token_d (&tok, &tok_size, 1); +} + static struct { char code; unsigned char namelen; |