Initial Push Version 0.0.1

author: louis <louis.braun@gmx.de> 2013-08-23 17:11:38 +0200
committer: louis <louis.braun@gmx.de> 2013-08-23 17:11:38 +0200
commit: 21c1ad1005e3415f8448a21f17ed3ba3be9715b8 (patch)
tree: e8cae7353b6047d1d1bd104ad54e5b15d0c85260 /tools
download: vdr-plugin-tvscraper-21c1ad1005e3415f8448a21f17ed3ba3be9715b8.tar.gz
vdr-plugin-tvscraper-21c1ad1005e3415f8448a21f17ed3ba3be9715b8.tar.bz2
5 files changed, 426 insertions, 0 deletions
diff --git a/tools/curlfuncs.cpp b/tools/curlfuncs.cpp
new file mode 100644
index 0000000..de4c72f
--- /dev/null
+++ b/tools/curlfuncs.cpp
@@ -0,0 +1,235 @@
+/*
+Copyright (c) 2002, Mayukh Bose
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.  
+
+* Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+* Neither the name of Mayukh Bose nor the names of other
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*
+  Change History:
+  11/23/2004 - Removed the #include <unistd.h> line because I didn't 
+               need it. Wonder why I had it there in the first place :).
+  10/20/2004 - Publicly released this code. 
+*/
+#include <string>
+#include <cstdio>
+#include <curl/curl.h>
+#include <curl/easy.h>
+#include "curlfuncs.h"
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+using namespace std;
+
+// Local function prototypes
+int CurlDoPost(const char *url, string *sOutput, const string &sReferer,
+	       struct curl_httppost *formpost, struct curl_slist *headerlist);
+
+namespace curlfuncs {
+  string sBuf;
+  bool bInitialized = false;
+  CURL *curl = NULL;
+}
+
+size_t collect_data(void *ptr, size_t size, size_t nmemb, void *stream)
+{
+  string sTmp;
+  register size_t actualsize = size * nmemb;
+  if ((FILE *)stream == NULL) {
+    sTmp.assign((char *)ptr, actualsize);
+    curlfuncs::sBuf += sTmp;
+  }
+  else {
+    size_t xxx = fwrite(ptr, size, nmemb, (FILE *)stream);
+  }
+  return actualsize;
+}
+
+inline void InitCurlLibraryIfNeeded() 
+{
+  if (!curlfuncs::bInitialized) {
+    curl_global_init(CURL_GLOBAL_ALL);
+    curlfuncs::curl = curl_easy_init();
+    if (!curlfuncs::curl)
+      throw string("Could not create new curl instance");
+    curl_easy_setopt(curlfuncs::curl, CURLOPT_NOPROGRESS, 1);       // Do not show progress
+    curl_easy_setopt(curlfuncs::curl, CURLOPT_WRITEFUNCTION, collect_data);
+    curl_easy_setopt(curlfuncs::curl, CURLOPT_WRITEDATA, 0);       // Set option to write to string
+    curl_easy_setopt(curlfuncs::curl, CURLOPT_FOLLOWLOCATION, TRUE);
+    curl_easy_setopt(curlfuncs::curl, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; Mayukh's libcurl wrapper http://www.mayukhbose.com/)");
+    curlfuncs::bInitialized = true;
+  }
+}
+
+int CurlGetUrl(const char *url, string *sOutput, const string &sReferer) 
+{
+  InitCurlLibraryIfNeeded();
+
+  curl_easy_setopt(curlfuncs::curl, CURLOPT_URL, url);            // Set the URL to get
+  if (sReferer != "")
+    curl_easy_setopt(curlfuncs::curl, CURLOPT_REFERER, sReferer.c_str());
+  curl_easy_setopt(curlfuncs::curl, CURLOPT_HTTPGET, TRUE);
+  curl_easy_setopt(curlfuncs::curl, CURLOPT_WRITEDATA, 0);       // Set option to write to string
+  curlfuncs::sBuf = "";
+  if (curl_easy_perform(curlfuncs::curl) == 0)
+    *sOutput = curlfuncs::sBuf;
+  else {
+    // We have an error here mate!
+    *sOutput = "";
+    return 0;
+  }
+  
+  return 1;
+}
+
+int CurlGetUrlFile(const char *url, const char *filename, const string &sReferer)
+{
+  int nRet = 0;
+  InitCurlLibraryIfNeeded();
+  
+  // Point the output to a file
+  FILE *fp;
+  if ((fp = fopen(filename, "w")) == NULL)
+    return 0;
+
+  curl_easy_setopt(curlfuncs::curl, CURLOPT_WRITEDATA, fp);       // Set option to write to file
+  curl_easy_setopt(curlfuncs::curl, CURLOPT_URL, url);            // Set the URL to get
+  if (sReferer != "")
+    curl_easy_setopt(curlfuncs::curl, CURLOPT_REFERER, sReferer.c_str());
+  curl_easy_setopt(curlfuncs::curl, CURLOPT_HTTPGET, TRUE);
+  if (curl_easy_perform(curlfuncs::curl) == 0)
+    nRet = 1;
+  else
+    nRet = 0;
+
+  curl_easy_setopt(curlfuncs::curl, CURLOPT_WRITEDATA, NULL);     // Set option back to default (string)
+  fclose(fp);
+  return nRet;
+}
+
+int CurlPostUrl(const char *url, const string &sPost, string *sOutput, const string &sReferer)
+{
+  InitCurlLibraryIfNeeded();
+
+  int retval = 1;
+  string::size_type nStart = 0, nEnd, nPos;
+  string sTmp, sName, sValue;
+  struct curl_httppost *formpost=NULL;
+  struct curl_httppost *lastptr=NULL;
+  struct curl_slist *headerlist=NULL;
+
+  // Add the POST variables here
+  while ((nEnd = sPost.find("##", nStart)) != string::npos) {
+    sTmp = sPost.substr(nStart, nEnd - nStart);
+    if ((nPos = sTmp.find("=")) == string::npos)
+      return 0;
+    sName = sTmp.substr(0, nPos);
+    sValue = sTmp.substr(nPos+1);
+    curl_formadd(&formpost, &lastptr, CURLFORM_COPYNAME, sName.c_str(), CURLFORM_COPYCONTENTS, sValue.c_str(), CURLFORM_END);
+    nStart = nEnd + 2;
+  }
+  sTmp = sPost.substr(nStart);
+  if ((nPos = sTmp.find("=")) == string::npos)
+    return 0;
+  sName = sTmp.substr(0, nPos);
+  sValue = sTmp.substr(nPos+1);
+  curl_formadd(&formpost, &lastptr, CURLFORM_COPYNAME, sName.c_str(), CURLFORM_COPYCONTENTS, sValue.c_str(), CURLFORM_END);
+
+  retval = CurlDoPost(url, sOutput, sReferer, formpost, headerlist);
+
+  curl_formfree(formpost);
+  curl_slist_free_all(headerlist);
+  return retval;
+}
+
+int CurlPostRaw(const char *url, const string &sPost, string *sOutput, const string &sReferer)
+{
+  InitCurlLibraryIfNeeded();
+
+  int retval;
+  struct curl_httppost *formpost=NULL;
+  struct curl_slist *headerlist=NULL;
+
+  curl_easy_setopt(curlfuncs::curl, CURLOPT_POSTFIELDS, sPost.c_str());
+  curl_easy_setopt(curlfuncs::curl, CURLOPT_POSTFIELDSIZE, 0); //FIXME: Should this be the size instead, in case this is binary string?
+
+  retval = CurlDoPost(url, sOutput, sReferer, formpost, headerlist);
+
+  curl_formfree(formpost);
+  curl_slist_free_all(headerlist);
+  return retval;  
+}
+
+int CurlDoPost(const char *url, string *sOutput, const string &sReferer,
+		struct curl_httppost *formpost, struct curl_slist *headerlist) 
+{
+  headerlist = curl_slist_append(headerlist, "Expect:");
+
+  // Now do the form post
+  curl_easy_setopt(curlfuncs::curl, CURLOPT_URL, url);
+  if (sReferer != "")
+    curl_easy_setopt(curlfuncs::curl, CURLOPT_REFERER, sReferer.c_str());
+  curl_easy_setopt(curlfuncs::curl, CURLOPT_HTTPPOST, formpost);
+
+  curl_easy_setopt(curlfuncs::curl, CURLOPT_WRITEDATA, 0); // Set option to write to string
+  curlfuncs::sBuf = "";
+  if (curl_easy_perform(curlfuncs::curl) == 0) {
+    *sOutput = curlfuncs::sBuf;
+    return 1;
+  }
+  else {
+    // We have an error here mate!
+    *sOutput = "";
+    return 0;
+  }
+}
+
+void FreeCurlLibrary(void)
+{
+  if (curlfuncs::curl)
+    curl_easy_cleanup(curlfuncs::curl);
+  curl_global_cleanup();
+  curlfuncs::bInitialized = false;
+}
+
+int CurlSetCookieFile(char *filename)
+{
+  InitCurlLibraryIfNeeded();
+  if (curl_easy_setopt(curlfuncs::curl, CURLOPT_COOKIEFILE, filename) != 0)
+    return 0;
+  if (curl_easy_setopt(curlfuncs::curl, CURLOPT_COOKIEJAR, filename) != 0)
+    return 0;
+  return 1;
+}
+
+char *CurlEscape(const char *url) {
+    InitCurlLibraryIfNeeded();
+    return curl_escape(url , strlen(url));
+}
diff --git a/tools/curlfuncs.h b/tools/curlfuncs.h
new file mode 100644
index 0000000..e0f76cf
--- /dev/null
+++ b/tools/curlfuncs.h
@@ -0,0 +1,45 @@
+/*
+Copyright (c) 2002, Mayukh Bose
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.  
+
+* Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+* Neither the name of Mayukh Bose nor the names of other
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef __CURLFUNCS_H_20020513__
+#define __CURLFUNCS_H_20020513__
+#include <string>
+  using namespace std;
+
+int CurlGetUrl(const char *url, string *sOutput, const string &sReferer="");
+int CurlGetUrlFile(const char *url, const char *filename, const string &sReferer="");
+void FreeCurlLibrary(void);
+int CurlSetCookieFile(char *filename);
+int CurlPostUrl(const char *url, const string &sPost, string *sOutput, const string &sReferer = "");
+int CurlPostRaw(const char *url, const string &sPost, string *sOutput, const string &sReferer = "");
+char *CurlEscape(const char *url);
+#endif
diff --git a/tools/filesystem.c b/tools/filesystem.c
new file mode 100644
index 0000000..724b28c
--- /dev/null
+++ b/tools/filesystem.c
@@ -0,0 +1,44 @@
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/vfs.h>
+#include <fstream>
+
+using namespace std;
+
+bool CreateDirectory(string dir) {
+    mkdir(dir.c_str(), 0775);
+    //check if successfull
+    DIR *pDir;
+    bool exists = false;
+    pDir = opendir(dir.c_str());
+    if (pDir != NULL) {
+        exists = true;    
+        closedir(pDir);
+    }
+    return exists;
+}
+
+bool FileExists(string filename) {
+    ifstream ifile(filename.c_str());
+    if (ifile) {
+        //a valid image should be larger then 500 bytes
+        ifile.seekg (0, ifile.end);
+        int length = ifile.tellg();
+        if (length > 500)
+            return true;
+    }
+    return false;
+}
+
+bool CheckDirExists(const char* dirName) {
+    struct statfs statfsbuf;
+    if (statfs(dirName,&statfsbuf)==-1) return false;
+    if ((statfsbuf.f_type!=0x01021994) && (statfsbuf.f_type!=0x28cd3d45)) return false;
+    if (access(dirName,R_OK|W_OK)==-1) return false;
+    return true;
+
+}
+
+void DeleteFile(string filename) {
+    remove(filename.c_str());
+}
+\ No newline at end of file
diff --git a/tools/fuzzy.c b/tools/fuzzy.c
new file mode 100644
index 0000000..188f017
--- /dev/null
+++ b/tools/fuzzy.c
@@ -0,0 +1,70 @@
+#include <iterator>
+#include <algorithm>
+#include <vector>
+#include <string>
+#include <sstream>
+#include <iostream>
+ 
+template<typename T, typename C>
+size_t
+seq_distance(const T& seq1, const T& seq2, const C& cost,
+             const typename T::value_type& empty = typename T::value_type()) {
+  const size_t size1 = seq1.size();
+  const size_t size2 = seq2.size();
+ 
+  std::vector<size_t> curr_col(size2 + 1);
+  std::vector<size_t> prev_col(size2 + 1);
+ 
+  // Prime the previous column for use in the following loop:
+  prev_col[0] = 0;
+  for (size_t idx2 = 0; idx2 < size2; ++idx2) {
+    prev_col[idx2 + 1] = prev_col[idx2] + cost(empty, seq2[idx2]);
+  }
+ 
+  for (size_t idx1 = 0; idx1 < size1; ++idx1) {
+    curr_col[0] = curr_col[0] + cost(seq1[idx1], empty);
+ 
+    for (size_t idx2 = 0; idx2 < size2; ++idx2) {
+      curr_col[idx2 + 1] = std::min(std::min(
+        curr_col[idx2] + cost(empty, seq2[idx2]),
+        prev_col[idx2 + 1] + cost(seq1[idx1], empty)),
+        prev_col[idx2] + cost(seq1[idx1], seq2[idx2]));
+    }
+ 
+    curr_col.swap(prev_col);
+    curr_col[0] = prev_col[0];
+  }
+ 
+  return prev_col[size2];
+}
+ 
+size_t
+letter_distance(char letter1, char letter2) {
+  return letter1 != letter2 ? 1 : 0;
+}
+ 
+size_t
+word_distance(const std::string& word1, const std::string& word2) {
+  return seq_distance(word1, word2, &letter_distance);
+}
+ 
+size_t
+sentence_distance(const std::string& sentence1, const std::string& sentence2) {
+  std::vector<std::string> words1;
+  std::vector<std::string> words2;
+  std::istringstream iss1(sentence1);
+  std::istringstream iss2(sentence2);
+  for(std::istream_iterator<std::string> it(iss1), end; ; ) {
+    words1.push_back(*it);
+    if(++it == end)
+      break;
+    words1.push_back(" ");
+  }
+  for(std::istream_iterator<std::string> it(iss2), end; ; ) {
+    words2.push_back(*it);
+    if(++it == end)
+      break;
+    words2.push_back(" ");
+  }
+  return seq_distance(words1, words2, &word_distance);
+}
+\ No newline at end of file
diff --git a/tools/splitstring.c b/tools/splitstring.c
new file mode 100644
index 0000000..14af577
--- /dev/null
+++ b/tools/splitstring.c
@@ -0,0 +1,32 @@
+using namespace std;
+
+class splitstring : public string {
+    vector<string> flds;
+public:
+    splitstring(const char *s) : string(s) { };
+    vector<string>& split(char delim, int rep=0);
+};
+
+// split: receives a char delimiter; returns a vector of strings
+// By default ignores repeated delimiters, unless argument rep == 1.
+vector<string>& splitstring::split(char delim, int rep) {
+    if (!flds.empty()) flds.clear();  // empty vector if necessary
+    string work = data();
+    string buf = "";
+    int i = 0;
+    while (i < work.length()) {
+        if (work[i] != delim)
+            buf += work[i];
+        else if (rep == 1) {
+            flds.push_back(buf);
+            buf = "";
+        } else if (buf.length() > 0) {
+            flds.push_back(buf);
+            buf = "";
+        }
+        i++;
+    }
+    if (!buf.empty())
+        flds.push_back(buf);
+    return flds;
+}
author	louis <louis.braun@gmx.de>	2013-08-23 17:11:38 +0200
committer	louis <louis.braun@gmx.de>	2013-08-23 17:11:38 +0200
commit	21c1ad1005e3415f8448a21f17ed3ba3be9715b8 (patch)
tree	e8cae7353b6047d1d1bd104ad54e5b15d0c85260 /tools
download	vdr-plugin-tvscraper-21c1ad1005e3415f8448a21f17ed3ba3be9715b8.tar.gz vdr-plugin-tvscraper-21c1ad1005e3415f8448a21f17ed3ba3be9715b8.tar.bz2