summaryrefslogtreecommitdiff
path: root/dist
diff options
context:
space:
mode:
authorJochen Dolze <vdr@dolze.de>2011-11-27 18:56:20 +0100
committerJochen Dolze <vdr@dolze.de>2011-11-27 18:56:20 +0100
commita6acfd6f9ad4227008667ff269bfa7ce2185f28b (patch)
tree59a5f7cd28b72abe0032a525be1178b0c37c24db /dist
parent3594e6b915e4078b8ec20f1cc9caf9082a460067 (diff)
downloadvdr-plugin-xmltv2vdr-a6acfd6f9ad4227008667ff269bfa7ce2185f28b.tar.gz
vdr-plugin-xmltv2vdr-a6acfd6f9ad4227008667ff269bfa7ce2185f28b.tar.bz2
epgdata2xmltv now handles wrong charset and encoding (only sort of!)v0.0.2
added eplist support
Diffstat (limited to 'dist')
-rw-r--r--dist/epgdata2xmltv/Makefile4
-rw-r--r--dist/epgdata2xmltv/epgdata2xmltv.cpp38
-rw-r--r--dist/epgdata2xmltv/epgdata2xmltv.h3
3 files changed, 35 insertions, 10 deletions
diff --git a/dist/epgdata2xmltv/Makefile b/dist/epgdata2xmltv/Makefile
index 2ff7f4e..e13b84d 100644
--- a/dist/epgdata2xmltv/Makefile
+++ b/dist/epgdata2xmltv/Makefile
@@ -11,8 +11,8 @@ STRIP ?= strip
### Includes and Defines (add further entries here):
-PKG-LIBS += libxml-2.0 libxslt libexslt libcurl libzip
-PKG-INCLUDES += libxml-2.0 libxslt libexslt libcurl libzip
+PKG-LIBS += libxml-2.0 libxslt libexslt libcurl libzip libpcrecpp enca
+PKG-INCLUDES += libxml-2.0 libxslt libexslt libcurl libzip libpcrecpp enca
DEFINES += -D_GNU_SOURCE
DEFINES += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
diff --git a/dist/epgdata2xmltv/epgdata2xmltv.cpp b/dist/epgdata2xmltv/epgdata2xmltv.cpp
index f432c07..7515cb0 100644
--- a/dist/epgdata2xmltv/epgdata2xmltv.cpp
+++ b/dist/epgdata2xmltv/epgdata2xmltv.cpp
@@ -8,6 +8,8 @@
#include <string.h>
#include <locale.h>
#include <zip.h>
+#include <pcrecpp.h>
+#include <enca.h>
#include <libxml/parserInternals.h>
#include "epgdata2xmltv.h"
#include "epgdata2xmltv_xsl.h"
@@ -381,7 +383,6 @@ int cepgdata2xmltv::Process(int argc, char *argv[])
break;
}
dtdmem[size]=0;
- dtdmem=strreplace(dtdmem,"ISO-8859-1","Windows-1252");
zip_fclose(zfile);
int entries=zip_get_num_files(zip);
@@ -484,11 +485,38 @@ int cepgdata2xmltv::Process(int argc, char *argv[])
xmlDocPtr pxmlDoc;
if (!pxsltStylesheet) LoadXSLT();
- if ((pxmlDoc=xmlParseMemory(xmlmem,strlen(xmlmem)))==NULL)
+ int xmlsize=strlen(xmlmem);
+ if ((pxmlDoc=xmlParseMemory(xmlmem,xmlsize))==NULL)
{
- esyslog("failed parsing xml");
- free(xmlmem);
- continue;
+ EncaAnalyser analyser=enca_analyser_alloc("__");
+ if (analyser) {
+ EncaEncoding encoding=enca_analyse_const(analyser, (unsigned char *) xmlmem,xmlsize);
+ const char *cs=enca_charset_name(encoding.charset, ENCA_NAME_STYLE_ICONV);
+ if (cs) {
+ if (!strcmp(cs,"UTF-8")) {
+ xmlmem=strreplace(xmlmem,"Windows-1252","UTF-8");
+ } else {
+ esyslog("enca returned %s, please report!",cs);
+ }
+ }
+ enca_analyser_free(analyser);
+ }
+
+ string s = xmlmem;
+ int reps=pcrecpp::RE("&(?![a-zA-Z]{1,8};)").GlobalReplace("%amp;",&s);
+ if (reps) {
+ xmlmem = (char *)realloc(xmlmem, s.size()+1);
+ xmlsize = s.size();
+ strcpy(xmlmem,s.c_str());
+ }
+
+ if ((pxmlDoc=xmlParseMemory(xmlmem,xmlsize))==NULL)
+ {
+ esyslog("failed parsing xml");
+ free(xmlmem);
+ xmlmem=NULL;
+ continue;
+ }
}
for (;;)
diff --git a/dist/epgdata2xmltv/epgdata2xmltv.h b/dist/epgdata2xmltv/epgdata2xmltv.h
index c49794d..818e883 100644
--- a/dist/epgdata2xmltv/epgdata2xmltv.h
+++ b/dist/epgdata2xmltv/epgdata2xmltv.h
@@ -49,9 +49,6 @@ private:
int DownloadData(const char *url);
bool Translate(xmlDocPtr pxmlDoc, const char **params);
void LoadXSLT();
-/* xmlParserInputPtr xmlMyExternalEntityLoader(const char *URL,
- const char *ID,
- xmlParserCtxtPtr ctxt); */
public:
cepgdata2xmltv();
~cepgdata2xmltv();