summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--glcdgraphics/common.c35
-rw-r--r--glcdgraphics/common.h8
-rw-r--r--glcdskin/parser.c2
-rw-r--r--glcdskin/xml.c96
-rw-r--r--glcdskin/xml.h13
5 files changed, 106 insertions, 48 deletions
diff --git a/glcdgraphics/common.c b/glcdgraphics/common.c
index 8d6e8f6..1c78e23 100644
--- a/glcdgraphics/common.c
+++ b/glcdgraphics/common.c
@@ -59,15 +59,12 @@ std::string trim(const std::string & s)
}
-// character to return when erraneous utf-8 sequence (for now: space)
-#define UTF8_ERRCODE 0x0020
-// for debugging issues return '_' instead:
-//#define UTF8_ERRCODE 0x005F
-
-void encodedCharAdjustCounter(const bool isutf8, const std::string & str, uint32_t & c, unsigned int & i)
+bool encodedCharAdjustCounter(const bool isutf8, const std::string & str, uint32_t & c, unsigned int & i, const uint32_t errChar)
{
+ bool rv = false;
+
if (i >= str.length())
- return;
+ return rv;
if ( isutf8 ) {
uint8_t c0,c1,c2,c3;
@@ -80,41 +77,47 @@ void encodedCharAdjustCounter(const bool isutf8, const std::string & str, uint32
if ( (c0 & 0x80) == 0x00) {
// one byte: 0xxxxxxx
c = c0;
+ rv = true;
} else if ( (c0 & 0xE0) == 0xC0 ) {
// two byte utf8: 110yyyyy 10xxxxxx -> 00000yyy yyxxxxxx
if ( (c1 & 0xC0) == 0x80 ) {
c = ( (c0 & 0x1F) << 6 ) | ( (c1 & 0x3F) );
+ rv = true;
} else {
- //syslog(LOG_INFO, "GraphLCD: illegal 2-byte UTF-8 sequence found: 0x%02x 0x%02x\n", c0, c1);
- c = UTF8_ERRCODE;
+ //syslog(LOG_INFO, "GraphLCD: illegal 2-byte UTF-8 sequence found: %02x %02x, pos=%d, str: %s\n", c0,c1,i,str.c_str());
+ c = errChar;
}
- i += 1;
+ i += 1;
} else if ( (c0 & 0xF0) == 0xE0 ) {
// three byte utf8: 1110zzzz 10yyyyyy 10xxxxxx -> zzzzyyyy yyxxxxxx
if ( ((c1 & 0xC0) == 0x80) && ((c2 & 0xC0) == 0x80) ) {
c = ( (c0 & 0x0F) << 12 ) | ( (c1 & 0x3F) << 6 ) | ( c2 & 0x3F );
+ rv = true;
} else {
- //syslog(LOG_INFO, "GraphLCD: illegal 3-byte UTF-8 sequence found: 0x%02x 0x%02x 0x%02x\n", c0, c1, c2);
- c = UTF8_ERRCODE;
+ //syslog(LOG_INFO, "GraphLCD: illegal 3-byte UTF-8 sequence found: %02x %02x %02x, pos=%d, str: %s\n", c0,c1,c2,i,str.c_str());
+ c = errChar;
}
i += 2;
} else if ( (c0 & 0xF8) == 0xF0 ) {
// four byte utf8: 11110www 10zzzzzz 10yyyyyy 10xxxxxx -> 000wwwzz zzzzyyyy yyxxxxxx
if ( ((c1 & 0xC0) == 0x80) && ((c2 & 0xC0) == 0x80) && ((c3 & 0xC0) == 0x80) ) {
c = ( (c0 & 0x07) << 18 ) | ( (c1 & 0x3F) << 12 ) | ( (c2 & 0x3F) << 6 ) | (c3 & 0x3F);
+ rv = true;
} else {
- //syslog(LOG_INFO, "GraphLCD: illegal 4-byte UTF-8 sequence found: 0x%02x 0x%02x 0x%02x 0x%02x\n", c0, c1, c2, c3);
- c = UTF8_ERRCODE;
+ //syslog(LOG_INFO, "GraphLCD: illegal 4-byte UTF-8 sequence found: %02x %02x %02x %02x, pos=%d, str: %s\n", c0,c1,c2,c3,i,str.c_str());
+ c = errChar;
}
i += 3;
} else {
// 1xxxxxxx is invalid!
- //syslog(LOG_INFO, "GraphLCD: illegal 1-byte UTF-8 char found: 0x%02x\n", c0);
- c = UTF8_ERRCODE;
+ //syslog(LOG_INFO, "GraphLCD: illegal 1-byte UTF-8 char found: %02x, pos=%d, str: %s\n", c0,i,str.c_str());
+ c = errChar;
}
} else {
c = str[i];
+ rv = true;
}
+ return rv;
}
} // end of namespace
diff --git a/glcdgraphics/common.h b/glcdgraphics/common.h
index 0043390..f0376bf 100644
--- a/glcdgraphics/common.h
+++ b/glcdgraphics/common.h
@@ -15,13 +15,19 @@
#include <string>
#include <stdint.h>
+// character to return when erraneous utf-8 sequence (for now: space)
+//#define UTF8_ERRCHAR 0x0020
+// for debugging issues return '_' instead:
+#define UTF8_ERRCHAR 0x005F
+
+
namespace GLCD
{
void clip(int & value, int min, int max);
void sort(int & value1, int & value2);
std::string trim(const std::string & s);
-void encodedCharAdjustCounter(const bool isutf8, const std::string & str, uint32_t & c, unsigned int & i);
+bool encodedCharAdjustCounter(const bool isutf8, const std::string & str, uint32_t & c, unsigned int & i, const uint32_t errChar = UTF8_ERRCHAR);
} // end of namespace
diff --git a/glcdskin/parser.c b/glcdskin/parser.c
index 0fc9689..d64f1c6 100644
--- a/glcdskin/parser.c
+++ b/glcdskin/parser.c
@@ -448,7 +448,7 @@ cSkin * XmlParse(cSkinConfig & Config, const std::string & Name, const std::stri
skin = new cSkin(Config, Name);
context.clear();
- cXML xml(fileName);
+ cXML xml(fileName, skin->Config().CharSet());
xml.SetNodeStartCB(StartElem);
xml.SetNodeEndCB(EndElem);
xml.SetCDataCB(CharData);
diff --git a/glcdskin/xml.c b/glcdskin/xml.c
index ed29d11..f36d7bb 100644
--- a/glcdskin/xml.c
+++ b/glcdskin/xml.c
@@ -15,32 +15,14 @@
#include <iostream>
#include <fstream>
+#include <string.h>
+
#include "xml.h"
+#include "../glcdgraphics/common.h"
namespace GLCD
{
-std::string trim(const std::string & s)
-{
- std::string::size_type start, end;
-
- start = 0;
- while (start < s.length())
- {
- if (!isspace(s[start]))
- break;
- start++;
- }
- end = s.length() - 1;
- while (end >= 0)
- {
- if (!isspace(s[end]))
- break;
- end--;
- }
- return s.substr(start, end - start + 1);
-}
-
enum {
LOOK4START, // looking for first element start
LOOK4TAG, // looking for element tag
@@ -57,7 +39,7 @@ enum {
INCLOSETAG, // reading closing tag
};
-cXML::cXML(const std::string & file)
+cXML::cXML(const std::string & file, const std::string sysCharset)
: nodestartcb(NULL),
nodeendcb(NULL),
cdatacb(NULL),
@@ -66,6 +48,18 @@ cXML::cXML(const std::string & file)
{
char * buffer;
long size;
+ sysEncoding = sysCharset;
+ sysIsUTF8 = (sysEncoding == "UTF-8");
+ if (!sysIsUTF8) {
+ // convert from utf-8 to system encoding
+ iconv_cd = iconv_open(sysEncoding.c_str(), "UTF-8");
+ if (iconv_cd == (iconv_t) -1) {
+ syslog(LOG_ERR, "ERROR: system encoding %s is not supported\n", sysEncoding.c_str());
+ iconv_cd = NULL;
+ }
+ } else {
+ iconv_cd = NULL;
+ }
#if (__GNUC__ < 3)
std::ifstream f(file.c_str(), std::ios::in | std::ios::binary | std::ios::ate);
@@ -89,6 +83,7 @@ cXML::cXML(const std::string & file)
delete[] buffer;
}
+#if 0
cXML::cXML(const char * mem, unsigned int len)
: nodestartcb(NULL),
nodeendcb(NULL),
@@ -98,6 +93,13 @@ cXML::cXML(const char * mem, unsigned int len)
{
data.assign(mem, len);
}
+#endif
+
+cXML::~cXML()
+{
+ if (iconv_cd != NULL)
+ iconv_close(iconv_cd);
+}
void cXML::SetNodeStartCB(XML_NODE_START_CB(cb))
{
@@ -129,14 +131,26 @@ int cXML::Parse(void)
int percent = 0;
int last = 0;
std::string::size_type len;
+ uint32_t c, c_tmp;
+ unsigned int i_old;
+ int l, char_size;
state = LOOK4START;
linenr = 1;
skipping = false;
len = data.length();
- for (std::string::size_type i = 0; i < len; i++)
+
+ unsigned int i = 0;
+ while (i < (unsigned int)len)
{
- if (ReadChar(data[i]) != 0)
+ i_old = i;
+ encodedCharAdjustCounter(true, data, c_tmp, i);
+ char_size = (i - i_old) + 1;
+ c = 0;
+ for (l = 0 ; l < char_size; l++)
+ c += ( (0xFF & data[i_old + l]) << ( l << 3) );
+
+ if (ReadChar(c /*data[i]*/, char_size) != 0)
return -1;
if (progresscb)
{
@@ -147,6 +161,7 @@ int cXML::Parse(void)
last = percent;
}
}
+ i++;
}
return 0;
}
@@ -156,8 +171,15 @@ bool cXML::IsTokenChar(bool start, int c)
return isalpha(c) || c == '_' || (!start && isdigit(c));
}
-int cXML::ReadChar(int c)
+int cXML::ReadChar(unsigned int c, int char_size)
{
+ // buffer for conversions (when conversion from utf8 to system encoding is required)
+ char convbufin[5];
+ char convbufout[5];
+ char* convbufinp = convbufin;
+ char* convbufoutp = convbufout;
+ size_t bufin_size, bufout_size, bufconverted;
+
// new line?
if (c == '\n')
linenr++;
@@ -190,7 +212,29 @@ int cXML::ReadChar(int c)
state = LOOK4TAG;
}
else
- cdata += c;
+ {
+ int i;
+ //cdata += c;
+ // convert text-data on the fly if system encoding != UTF-8
+ if (iconv_cd != NULL && char_size > 1 /* ((c & 0x80) == 0x80)*/) {
+ for (i = 0; i < char_size; i++)
+ convbufin[i] = ( (char)((c >> ( i << 3) ) & 0xFF) );
+ convbufin[char_size] = '\0';
+ bufin_size = strlen(convbufin);
+ bufout_size = bufin_size;
+ bufconverted = iconv(iconv_cd, &convbufinp, &bufin_size, &convbufoutp, &bufout_size);
+
+ if (bufconverted != (size_t)-1 && strlen(convbufout) != 0) {
+ for (i = 0; i < (int)strlen(convbufout); i++)
+ cdata += convbufout[i];
+ } else {
+ cdata += "?";
+ }
+ } else {
+ for (i = 0; i < char_size; i++)
+ cdata += ( (unsigned char)((c >> ( i << 3) ) & 0xFF) );
+ }
+ }
// silently ignore until resync
break;
diff --git a/glcdskin/xml.h b/glcdskin/xml.h
index 009679c..30210cc 100644
--- a/glcdskin/xml.h
+++ b/glcdskin/xml.h
@@ -15,6 +15,7 @@
#include <string>
#include <map>
+#include <iconv.h>
namespace GLCD
{
@@ -36,7 +37,10 @@ private:
bool skipping;
int state;
int linenr;
- int delim;
+ unsigned int delim;
+ std::string sysEncoding;
+ bool sysIsUTF8;
+ iconv_t iconv_cd;
std::string data, cdata, tag, attrn, attrv;
std::map<std::string, std::string> attr;
@@ -49,11 +53,12 @@ private:
protected:
bool IsTokenChar(bool start, int c);
- int ReadChar(int c);
+ int ReadChar(unsigned int c, int char_size);
public:
- cXML(const std::string & file);
- cXML(const char * mem, unsigned int len);
+ cXML(const std::string & file, const std::string sysCharset = "UTF-8");
+ //cXML(const char * mem, unsigned int len);
+ ~cXML();
void SetNodeStartCB(XML_NODE_START_CB(cb));
void SetNodeEndCB(XML_NODE_END_CB(cb));