summaryrefslogtreecommitdiff
path: root/src/xine-utils/xmllexer.c
diff options
context:
space:
mode:
authorDiego 'Flameeyes' Pettenò <flameeyes@gmail.com>2007-12-19 01:09:39 +0100
committerDiego 'Flameeyes' Pettenò <flameeyes@gmail.com>2007-12-19 01:09:39 +0100
commitfedec814a0af828f9c4e97ed49cdb727e0d64ad0 (patch)
tree6614288d9f0937b640d0efcb757a6ea13bac7a6d /src/xine-utils/xmllexer.c
parent5c2f96797e1ad0c9e1e930fd86941fcec27abdc4 (diff)
parent64596f317f7d03ed1f3e747cd2b0664b1d4f6535 (diff)
downloadxine-lib-fedec814a0af828f9c4e97ed49cdb727e0d64ad0.tar.gz
xine-lib-fedec814a0af828f9c4e97ed49cdb727e0d64ad0.tar.bz2
Merge from 1.2 branch.
Diffstat (limited to 'src/xine-utils/xmllexer.c')
-rw-r--r--src/xine-utils/xmllexer.c62
1 files changed, 62 insertions, 0 deletions
diff --git a/src/xine-utils/xmllexer.c b/src/xine-utils/xmllexer.c
index 39632938f..bb03e5a79 100644
--- a/src/xine-utils/xmllexer.c
+++ b/src/xine-utils/xmllexer.c
@@ -40,6 +40,8 @@
#include <iconv.h>
#endif
+#include "bswap.h"
+
/* private constants*/
/* private global variables */
@@ -47,6 +49,45 @@ static const char * lexbuf;
static int lexbuf_size = 0;
static int lexbuf_pos = 0;
static int in_comment = 0;
+static char *lex_malloc = NULL;
+
+enum utf { UTF32BE, UTF32LE, UTF16BE, UTF16LE };
+
+static void lex_convert (const char * buf, int size, enum utf utf)
+{
+ char *utf8 = malloc (size * (utf >= UTF16BE ? 3 : 6) + 1);
+ char *bp = utf8;
+ while (size > 0)
+ {
+ uint32_t c = 0;
+ switch (utf)
+ {
+ case UTF32BE: c = _X_BE_32 (buf); buf += 4; break;
+ case UTF32LE: c = _X_LE_32 (buf); buf += 4; break;
+ case UTF16BE: c = _X_BE_16 (buf); buf += 2; break;
+ case UTF16LE: c = _X_LE_16 (buf); buf += 2; break;
+ }
+ if (!c)
+ break; /* embed a NUL, get a truncated string */
+ if (c < 128)
+ *bp++ = c;
+ else
+ {
+ int count = (c >= 0x04000000) ? 5 :
+ (c >= 0x00200000) ? 4 :
+ (c >= 0x00010000) ? 3 :
+ (c >= 0x00000800) ? 2 : 1;
+ *bp = (char)(0x1F80 >> count);
+ count *= 6;
+ *bp++ |= c >> count;
+ while ((count -= 6) >= 0)
+ *bp++ = 128 | ((c >> count) & 0x3F);
+ }
+ }
+ *bp = 0;
+ lexbuf_size = bp - utf8;
+ lexbuf = lex_malloc = realloc (utf8, lexbuf_size + 1);
+}
static enum {
NORMAL,
@@ -55,8 +96,29 @@ static enum {
} lex_mode = NORMAL;
void lexer_init(const char * buf, int size) {
+ static const char boms[] = { 0xFF, 0xFE, 0, 0, 0xFE, 0xFF },
+ bom_utf8[] = { 0xEF, 0xBB, 0xBF };
+
+ free (lex_malloc);
+ lex_malloc = NULL;
+
lexbuf = buf;
lexbuf_size = size;
+
+ if (size >= 4 && !memcmp (buf, boms + 2, 4))
+ lex_convert (buf + 4, size - 4, UTF32BE);
+ else if (size >= 4 && !memcmp (buf, boms, 4))
+ lex_convert (buf + 4, size - 4, UTF32LE);
+ else if (size >= 3 && !memcmp (buf, bom_utf8, 3))
+ {
+ lexbuf += 3;
+ lexbuf_size -= 3;
+ }
+ else if (size >= 2 && !memcmp (buf, boms + 4, 2))
+ lex_convert (buf + 2, size - 2, UTF16BE);
+ else if (size >= 2 && !memcmp (buf, boms, 2))
+ lex_convert (buf + 2, size - 2, UTF16LE);
+
lexbuf_pos = 0;
lex_mode = NORMAL;
in_comment = 0;