diff options
author | Jochen Dolze <vdr@dolze.de> | 2011-11-27 18:56:20 +0100 |
---|---|---|
committer | Jochen Dolze <vdr@dolze.de> | 2011-11-27 18:56:20 +0100 |
commit | a6acfd6f9ad4227008667ff269bfa7ce2185f28b (patch) | |
tree | 59a5f7cd28b72abe0032a525be1178b0c37c24db /dist | |
parent | 3594e6b915e4078b8ec20f1cc9caf9082a460067 (diff) | |
download | vdr-plugin-xmltv2vdr-a6acfd6f9ad4227008667ff269bfa7ce2185f28b.tar.gz vdr-plugin-xmltv2vdr-a6acfd6f9ad4227008667ff269bfa7ce2185f28b.tar.bz2 |
epgdata2xmltv now handles wrong charset and encoding (only sort of!)v0.0.2
added eplist support
Diffstat (limited to 'dist')
-rw-r--r-- | dist/epgdata2xmltv/Makefile | 4 | ||||
-rw-r--r-- | dist/epgdata2xmltv/epgdata2xmltv.cpp | 38 | ||||
-rw-r--r-- | dist/epgdata2xmltv/epgdata2xmltv.h | 3 |
3 files changed, 35 insertions, 10 deletions
diff --git a/dist/epgdata2xmltv/Makefile b/dist/epgdata2xmltv/Makefile index 2ff7f4e..e13b84d 100644 --- a/dist/epgdata2xmltv/Makefile +++ b/dist/epgdata2xmltv/Makefile @@ -11,8 +11,8 @@ STRIP ?= strip ### Includes and Defines (add further entries here): -PKG-LIBS += libxml-2.0 libxslt libexslt libcurl libzip -PKG-INCLUDES += libxml-2.0 libxslt libexslt libcurl libzip +PKG-LIBS += libxml-2.0 libxslt libexslt libcurl libzip libpcrecpp enca +PKG-INCLUDES += libxml-2.0 libxslt libexslt libcurl libzip libpcrecpp enca DEFINES += -D_GNU_SOURCE DEFINES += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE diff --git a/dist/epgdata2xmltv/epgdata2xmltv.cpp b/dist/epgdata2xmltv/epgdata2xmltv.cpp index f432c07..7515cb0 100644 --- a/dist/epgdata2xmltv/epgdata2xmltv.cpp +++ b/dist/epgdata2xmltv/epgdata2xmltv.cpp @@ -8,6 +8,8 @@ #include <string.h> #include <locale.h> #include <zip.h> +#include <pcrecpp.h> +#include <enca.h> #include <libxml/parserInternals.h> #include "epgdata2xmltv.h" #include "epgdata2xmltv_xsl.h" @@ -381,7 +383,6 @@ int cepgdata2xmltv::Process(int argc, char *argv[]) break; } dtdmem[size]=0; - dtdmem=strreplace(dtdmem,"ISO-8859-1","Windows-1252"); zip_fclose(zfile); int entries=zip_get_num_files(zip); @@ -484,11 +485,38 @@ int cepgdata2xmltv::Process(int argc, char *argv[]) xmlDocPtr pxmlDoc; if (!pxsltStylesheet) LoadXSLT(); - if ((pxmlDoc=xmlParseMemory(xmlmem,strlen(xmlmem)))==NULL) + int xmlsize=strlen(xmlmem); + if ((pxmlDoc=xmlParseMemory(xmlmem,xmlsize))==NULL) { - esyslog("failed parsing xml"); - free(xmlmem); - continue; + EncaAnalyser analyser=enca_analyser_alloc("__"); + if (analyser) { + EncaEncoding encoding=enca_analyse_const(analyser, (unsigned char *) xmlmem,xmlsize); + const char *cs=enca_charset_name(encoding.charset, ENCA_NAME_STYLE_ICONV); + if (cs) { + if (!strcmp(cs,"UTF-8")) { + xmlmem=strreplace(xmlmem,"Windows-1252","UTF-8"); + } else { + esyslog("enca returned %s, please report!",cs); + } + } + enca_analyser_free(analyser); + } + + string s = xmlmem; + int reps=pcrecpp::RE("&(?![a-zA-Z]{1,8};)").GlobalReplace("%amp;",&s); + if (reps) { + xmlmem = (char *)realloc(xmlmem, s.size()+1); + xmlsize = s.size(); + strcpy(xmlmem,s.c_str()); + } + + if ((pxmlDoc=xmlParseMemory(xmlmem,xmlsize))==NULL) + { + esyslog("failed parsing xml"); + free(xmlmem); + xmlmem=NULL; + continue; + } } for (;;) diff --git a/dist/epgdata2xmltv/epgdata2xmltv.h b/dist/epgdata2xmltv/epgdata2xmltv.h index c49794d..818e883 100644 --- a/dist/epgdata2xmltv/epgdata2xmltv.h +++ b/dist/epgdata2xmltv/epgdata2xmltv.h @@ -49,9 +49,6 @@ private: int DownloadData(const char *url); bool Translate(xmlDocPtr pxmlDoc, const char **params); void LoadXSLT(); -/* xmlParserInputPtr xmlMyExternalEntityLoader(const char *URL, - const char *ID, - xmlParserCtxtPtr ctxt); */ public: cepgdata2xmltv(); ~cepgdata2xmltv(); |