diff options
author | mrwastl <mrwastl@users.sourceforge.net> | 2011-06-03 18:27:37 +0200 |
---|---|---|
committer | mrwastl <mrwastl@users.sourceforge.net> | 2011-06-03 18:27:37 +0200 |
commit | 3ce10a1e896258a73e30ad7a17b99a82c286f8c3 (patch) | |
tree | b44c8eabf7cbd59f5d1667602f1c9f5e47ff67dc /glcdgraphics/common.c | |
parent | 90c80f6f57e45780d64c63908ca7813290bdd9ad (diff) | |
download | graphlcd-base-3ce10a1e896258a73e30ad7a17b99a82c286f8c3.tar.gz graphlcd-base-3ce10a1e896258a73e30ad7a17b99a82c286f8c3.tar.bz2 |
move EncodedCharAdjustCounter() to common.[ch] and rename it to encodedCharAdjustCounter()
Diffstat (limited to 'glcdgraphics/common.c')
-rw-r--r-- | glcdgraphics/common.c | 57 |
1 files changed, 57 insertions, 0 deletions
diff --git a/glcdgraphics/common.c b/glcdgraphics/common.c index 8942424..7bfcc74 100644 --- a/glcdgraphics/common.c +++ b/glcdgraphics/common.c @@ -57,4 +57,61 @@ std::string trim(const std::string & s) return s.substr(start, end - start + 1); } + +// character to return when erraneous utf-8 sequence (for now: '_') +#define UTF8_ERRCODE 0x005F +void encodedCharAdjustCounter(const bool isutf8, const std::string & str, uint32_t & c, unsigned int & i) +{ + if (i >= str.length()) + return; + + if ( isutf8 ) { + uint8_t c0,c1,c2,c3; + c = str[i]; + c0 = str[i]; + c1 = (i+1 < str.length()) ? str[i+1] : 0; + c2 = (i+2 < str.length()) ? str[i+2] : 0; + c3 = (i+3 < str.length()) ? str[i+3] : 0; + //c0 &=0xff; c1 &=0xff; c2 &=0xff; c3 &=0xff; + + if ( (c0 & 0x80) == 0x00) { + // one byte: 0xxxxxxx + c = c0; + } else if ( (c0 & 0xE0) == 0xC0 ) { + // two byte utf8: 110yyyyy 10xxxxxx -> 00000yyy yyxxxxxx + if ( (c1 & 0xC0) == 0x80 ) { + c = ( (c0 & 0x1F) << 6 ) | ( (c1 & 0x3F) ); + } else { + //syslog(LOG_INFO, "GraphLCD: illegal 2-byte UTF-8 sequence found: 0x%02x 0x%02x\n", c0, c1); + c = UTF8_ERRCODE; + } + i += 1; + } else if ( (c0 & 0xF0) == 0xE0 ) { + // three byte utf8: 1110zzzz 10yyyyyy 10xxxxxx -> zzzzyyyy yyxxxxxx + if ( ((c1 & 0xC0) == 0x80) && ((c2 & 0xC0) == 0x80) ) { + c = ( (c0 & 0x0F) << 12 ) | ( (c1 & 0x3F) << 6 ) | ( c2 & 0x3F ); + } else { + //syslog(LOG_INFO, "GraphLCD: illegal 3-byte UTF-8 sequence found: 0x%02x 0x%02x 0x%02x\n", c0, c1, c2); + c = UTF8_ERRCODE; + } + i += 2; + } else if ( (c0 & 0xF8) == 0xF0 ) { + // four byte utf8: 11110www 10zzzzzz 10yyyyyy 10xxxxxx -> 000wwwzz zzzzyyyy yyxxxxxx + if ( ((c1 & 0xC0) == 0x80) && ((c2 & 0xC0) == 0x80) && ((c3 & 0xC0) == 0x80) ) { + c = ( (c0 & 0x07) << 18 ) | ( (c1 & 0x3F) << 12 ) | ( (c2 & 0x3F) << 6 ) | (c3 & 0x3F); + } else { + //syslog(LOG_INFO, "GraphLCD: illegal 4-byte UTF-8 sequence found: 0x%02x 0x%02x 0x%02x 0x%02x\n", c0, c1, c2, c3); + c = UTF8_ERRCODE; + } + i += 3; + } else { + // 1xxxxxxx is invalid! + //syslog(LOG_INFO, "GraphLCD: illegal 1-byte UTF-8 char found: 0x%02x\n", c0); + c = UTF8_ERRCODE; + } + } else { + c = str[i]; + } +} + } // end of namespace |