summaryrefslogtreecommitdiff
path: root/libsi
diff options
context:
space:
mode:
authorKlaus Schmidinger <kls (at) cadsoft (dot) de>2007-04-22 18:00:00 +0200
committerKlaus Schmidinger <kls (at) cadsoft (dot) de>2007-04-22 18:00:00 +0200
commita5921252942f73601b159f20b560477ec45b4ece (patch)
treed1fbcbe0e7526a823898676767988e984d565b8d /libsi
parent9f42c33ef6793482a5c2515f9a87c13c0d189c60 (diff)
downloadvdr-patch-lnbsharing-a5921252942f73601b159f20b560477ec45b4ece.tar.gz
vdr-patch-lnbsharing-a5921252942f73601b159f20b560477ec45b4ece.tar.bz2
Version 1.5.2vdr-1.5.2
- Updated the Finnish OSD texts (thanks to Rolf Ahrenberg). - Fixed handling user activity for shutdown, which I had messed when adopting Udo's original patch (thanks to Udo Richter). - Added Turkish language texts (thanks to Oktay Yolgeçen). - Added missing rules for generating iso8859-13 font to Makefile. - 'libsi' now converts the incoming strings into the system's character set according to the DVB standard. The system's character set is determined from the LANG environment variable. If no recognizable setting can be found, no conversion will take place. Note that currently only the strings received from the SI data stream are converted, there have not been any changes regarding displaying UTF-8 characters on the OSD, yet - this will follow in one of the next steps. With this conversion, it should now be safe to run VDR on a UTF-8 file system, because all incoming characters are converted to UTF-8. This will most likely result in wrong characters being displayed on the OSD (because there UTF-8 is not known, yet), but the file names should be ok (haven't tested this myself, though, because I don't do UTF-8 - so please be very careful when testing!). There's one piece of bad news here: the German pay-tv broadcaster Premiere apparently encodes all EPG strings as ISO8859-1, but fails to correctly mark these strings as such. Therefore 'libsi' (following the DVB standard) considers the strings to be encoded in the default ISO6937 and converts them to whatever the system's character set is. This, of course, results in wrong umlauts. On its old transponder, the ProSieben/SAT.1 channels also had their EPG data wrongly encoded, but apparently on the new transponder they started broadcasting on this month, they got it right.
Diffstat (limited to 'libsi')
-rw-r--r--libsi/si.c196
-rw-r--r--libsi/si.h7
2 files changed, 184 insertions, 19 deletions
diff --git a/libsi/si.c b/libsi/si.c
index a16000d..a1d2f34 100644
--- a/libsi/si.c
+++ b/libsi/si.c
@@ -6,12 +6,15 @@
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
- * $Id: si.c 1.17 2007/02/03 11:45:58 kls Exp $
+ * $Id: si.c 1.18 2007/04/22 13:56:39 kls Exp $
* *
***************************************************************************/
-#include <string.h>
#include "si.h"
+#include <errno.h>
+#include <iconv.h>
+#include <malloc.h>
+#include <string.h>
#include "descriptor.h"
namespace SI {
@@ -232,7 +235,6 @@ char *String::getText(char *buffer, int size) {
return buffer;
}
-//taken from VDR, Copyright Klaus Schmidinger <kls@cadsoft.de>
char *String::getText(char *buffer, char *shortVersion, int sizeBuffer, int sizeShortVersion) {
int len=getLength();
if (len < 0 || len >= sizeBuffer) {
@@ -245,21 +247,163 @@ char *String::getText(char *buffer, char *shortVersion, int sizeBuffer, int size
return buffer;
}
-//taken from libdtv, Copyright Rolf Hakenes <hakenes@hippomi.de>
+static const char *CharacterTables1[] = {
+ NULL, // 0x00
+ "ISO8859-5", // 0x01
+ "ISO8859-6", // 0x02
+ "ISO8859-7", // 0x03
+ "ISO8859-8", // 0x04
+ "ISO8859-9", // 0x05
+ "ISO8859-10", // 0x06
+ "ISO8859-11", // 0x07
+ "ISO8859-12", // 0x08
+ "ISO8859-13", // 0x09
+ "ISO8859-14", // 0x0A
+ "ISO8859-15", // 0x0B
+ NULL, // 0x0C
+ NULL, // 0x0D
+ NULL, // 0x0E
+ NULL, // 0x0F
+ NULL, // 0x10
+ "UTF16", // 0x11
+ "EUC-KR", // 0x12
+ "GB2312", // 0x13
+ "GBK", // 0x14
+ "UTF8", // 0x15
+ NULL, // 0x16
+ NULL, // 0x17
+ NULL, // 0x18
+ NULL, // 0x19
+ NULL, // 0x1A
+ NULL, // 0x1B
+ NULL, // 0x1C
+ NULL, // 0x1D
+ NULL, // 0x1E
+ NULL, // 0x1F
+};
+
+#define SingleByteLimit 0x0B
+
+static const char *CharacterTables2[] = {
+ NULL, // 0x00
+ "ISO8859-1", // 0x01
+ "ISO8859-2", // 0x02
+ "ISO8859-3", // 0x03
+ "ISO8859-4", // 0x04
+ "ISO8859-5", // 0x05
+ "ISO8859-6", // 0x06
+ "ISO8859-7", // 0x07
+ "ISO8859-8", // 0x08
+ "ISO8859-9", // 0x09
+ "ISO8859-10", // 0x0A
+ "ISO8859-11", // 0x0B
+ NULL, // 0x0C
+ "ISO8859-13", // 0x0D
+ "ISO8859-14", // 0x0E
+ "ISO8859-15", // 0x0F
+};
+
+#define NumEntries(Table) (sizeof(Table) / sizeof(char *))
+
+static const char *SystemCharacterTable = NULL;
+bool SystemCharacterTableIsSingleByte = true;
+
+bool SetSystemCharacterTable(const char *CharacterTable) {
+ if (CharacterTable) {
+ for (unsigned int i = 0; i < NumEntries(CharacterTables1); i++) {
+ if (CharacterTables1[i] && strcasecmp(CharacterTable, CharacterTables1[i]) == 0) {
+ SystemCharacterTable = CharacterTables1[i];
+ SystemCharacterTableIsSingleByte = i <= SingleByteLimit;
+ return true;
+ }
+ }
+ for (unsigned int i = 0; i < NumEntries(CharacterTables2); i++) {
+ if (CharacterTables2[i] && strcasecmp(CharacterTable, CharacterTables2[i]) == 0) {
+ SystemCharacterTable = CharacterTables2[i];
+ SystemCharacterTableIsSingleByte = true;
+ return true;
+ }
+ }
+ } else {
+ SystemCharacterTable = NULL;
+ SystemCharacterTableIsSingleByte = true;
+ return true;
+ }
+ return false;
+}
+
+// Determines the character table used in the given buffer and returns
+// a string indicating that table. If no table can be determined, the
+// default ISO6937 is returned. If a table can be determined, the buffer
+// and length are adjusted accordingly.
+static const char *getCharacterTable(const unsigned char *&buffer, int &length, bool *isSingleByte = NULL) {
+ const char *cs = "ISO6937";
+ if (isSingleByte)
+ *isSingleByte = false;
+ if (length <= 0)
+ return cs;
+ unsigned int tag = buffer[0];
+ if (tag >= 0x20)
+ return cs;
+ if (tag == 0x10) {
+ if (length >= 3) {
+ tag = (buffer[1] << 8) | buffer[2];
+ if (tag < NumEntries(CharacterTables2) && CharacterTables2[tag]) {
+ buffer += 3;
+ length -= 3;
+ if (isSingleByte)
+ *isSingleByte = true;
+ return CharacterTables2[tag];
+ }
+ }
+ } else if (tag < NumEntries(CharacterTables1) && CharacterTables1[tag]) {
+ buffer += 1;
+ length -= 1;
+ if (isSingleByte)
+ *isSingleByte = tag <= SingleByteLimit;
+ return CharacterTables1[tag];
+ }
+ return cs;
+}
+
+static bool convertCharacterTable(const char *from, size_t fromLength, char *to, size_t toLength, const char *fromCode)
+{
+ if (SystemCharacterTable) {
+ iconv_t cd = iconv_open(SystemCharacterTable, fromCode);
+ if (cd >= 0) {
+ char *fromPtr = (char *)from;
+ while (fromLength > 0 && toLength > 1) {
+ if (iconv(cd, &fromPtr, &fromLength, &to, &toLength) == size_t(-1)) {
+ if (errno == EILSEQ) {
+ // A character can't be converted, so mark it with '?' and proceed:
+ fromPtr++;
+ fromLength--;
+ *to++ = '?';
+ toLength--;
+ }
+ else
+ break;
+ }
+ }
+ *to = 0;
+ iconv_close(cd);
+ return true;
+ }
+ }
+ return false;
+}
+
+// originally from libdtv, Copyright Rolf Hakenes <hakenes@hippomi.de>
void String::decodeText(char *buffer, int size) {
const unsigned char *from=data.getData(0);
char *to=buffer;
-
- /* Disable detection of coding tables - libdtv doesn't do it either
- if ( (0x01 <= *from) && (*from <= 0x1f) ) {
- codeTable=*from
- }
- */
-
- if (*from == 0x10)
- from += 3; // skips code table info
-
int len=getLength();
+ if (len <= 0) {
+ *to = '\0';
+ return;
+ }
+ bool singleByte;
+ const char *cs = getCharacterTable(from, len, &singleByte);
for (int i = 0; i < len; i++) {
if (*from == 0)
break;
@@ -276,6 +420,11 @@ void String::decodeText(char *buffer, int size) {
break;
}
*to = '\0';
+ if (!singleByte || !SystemCharacterTableIsSingleByte) {
+ char convBuffer[size];
+ if (convertCharacterTable(buffer, strlen(buffer), convBuffer, sizeof(convBuffer), cs))
+ strncpy(buffer, convBuffer, strlen(convBuffer) + 1);
+ }
}
void String::decodeText(char *buffer, char *shortVersion, int sizeBuffer, int sizeShortVersion) {
@@ -283,11 +432,14 @@ void String::decodeText(char *buffer, char *shortVersion, int sizeBuffer, int si
char *to=buffer;
char *toShort=shortVersion;
int IsShortName=0;
-
- if (*from == 0x10)
- from += 3; // skips code table info
-
int len=getLength();
+ if (len <= 0) {
+ *to = '\0';
+ *toShort = '\0';
+ return;
+ }
+ bool singleByte;
+ const char *cs = getCharacterTable(from, len, &singleByte);
for (int i = 0; i < len; i++) {
if ( ((' ' <= *from) && (*from <= '~'))
|| (*from == '\n')
@@ -312,6 +464,14 @@ void String::decodeText(char *buffer, char *shortVersion, int sizeBuffer, int si
}
*to = '\0';
*toShort = '\0';
+ if (!singleByte || !SystemCharacterTableIsSingleByte) {
+ char convBuffer[sizeBuffer];
+ if (convertCharacterTable(buffer, strlen(buffer), convBuffer, sizeof(convBuffer), cs))
+ strncpy(buffer, convBuffer, strlen(convBuffer) + 1);
+ char convShortVersion[sizeShortVersion];
+ if (convertCharacterTable(shortVersion, strlen(shortVersion), convShortVersion, sizeof(convShortVersion), cs))
+ strncpy(shortVersion, convShortVersion, strlen(convShortVersion) + 1);
+ }
}
Descriptor *Descriptor::getDescriptor(CharArray da, DescriptorTagDomain domain, bool returnUnimplemetedDescriptor) {
diff --git a/libsi/si.h b/libsi/si.h
index eb01609..a156d76 100644
--- a/libsi/si.h
+++ b/libsi/si.h
@@ -6,7 +6,7 @@
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
- * $Id: si.h 1.16 2007/02/03 11:47:25 kls Exp $
+ * $Id: si.h 1.17 2007/04/22 13:32:09 kls Exp $
* *
***************************************************************************/
@@ -486,6 +486,11 @@ protected:
void decodeText(char *buffer, char *shortVersion, int sizeBuffer, int sizeShortVersion);
};
+// Call this function to set the system character table. CharacterTable is a string
+// like "iso8859-15" or "utf-8" (case insensitive).
+// Returns true if the character table was recognized.
+bool SetSystemCharacterTable(const char *CharacterTable);
+
} //end of namespace
#endif //LIBSI_SI_H