From 698b4b9ff2091a477d77ecb4f3dd8de1fea07878 Mon Sep 17 00:00:00 2001 From: Darren Salt Date: Tue, 6 Nov 2007 13:36:11 +0000 Subject: Decode numbered XML char entities as UTF-8, and support codes >= 256. Ref. http://bugzilla.gnome.org/show_bug.cgi?id=484768#c12 --- src/xine-utils/xmllexer.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'src/xine-utils/xmllexer.c') diff --git a/src/xine-utils/xmllexer.c b/src/xine-utils/xmllexer.c index 754a006f9..068988eaa 100644 --- a/src/xine-utils/xmllexer.c +++ b/src/xine-utils/xmllexer.c @@ -39,6 +39,9 @@ #include #include #include +#ifdef HAVE_ICONV +#include +#endif /* private constants*/ #define NORMAL 0 /* normal lex mode */ @@ -463,7 +466,7 @@ char *lexer_decode_entities (const char *tok) { /* parse the character entity (on failure, treat it as literal text) */ const char *tp = tok; - long i; + signed long i; for (i = 0; lexer_entities[i].code; ++i) if (!strncmp (lexer_entities[i].name, tok, lexer_entities[i].namelen) @@ -491,7 +494,7 @@ char *lexer_decode_entities (const char *tok) else i = strtol (tp, (char **)&tp, 10); - if (i < 1 || i > 255 || *tp != ';') + if (*tp != ';' || i < 1) { /* out of range, or format error */ *bp++ = '&'; @@ -499,7 +502,23 @@ char *lexer_decode_entities (const char *tok) } tok = tp + 1; - *bp++ = i; + + if (i < 128) + /* ASCII - store as-is */ + *bp++ = i; + else + { + /* Non-ASCII, so convert to UTF-8 */ + int count = (i >= 0x04000000) ? 5 : + (i >= 0x00200000) ? 4 : + (i >= 0x00010000) ? 3 : + (i >= 0x00000800) ? 2 : 1; + *bp = (char)(0x1F80 >> count); + count *= 6; + *bp++ |= i >> count; + while ((count -= 6) >= 0) + *bp++ = 128 | ((i >> count) & 0x3F); + } } } *bp = 0; -- cgit v1.2.3