diff options
| author | louis <louis.braun@gmx.de> | 2013-08-23 17:11:38 +0200 |
|---|---|---|
| committer | louis <louis.braun@gmx.de> | 2013-08-23 17:11:38 +0200 |
| commit | 21c1ad1005e3415f8448a21f17ed3ba3be9715b8 (patch) | |
| tree | e8cae7353b6047d1d1bd104ad54e5b15d0c85260 /tools | |
| download | vdr-plugin-tvscraper-21c1ad1005e3415f8448a21f17ed3ba3be9715b8.tar.gz vdr-plugin-tvscraper-21c1ad1005e3415f8448a21f17ed3ba3be9715b8.tar.bz2 | |
Initial Push Version 0.0.1
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/curlfuncs.cpp | 235 | ||||
| -rw-r--r-- | tools/curlfuncs.h | 45 | ||||
| -rw-r--r-- | tools/filesystem.c | 44 | ||||
| -rw-r--r-- | tools/fuzzy.c | 70 | ||||
| -rw-r--r-- | tools/splitstring.c | 32 |
5 files changed, 426 insertions, 0 deletions
diff --git a/tools/curlfuncs.cpp b/tools/curlfuncs.cpp new file mode 100644 index 0000000..de4c72f --- /dev/null +++ b/tools/curlfuncs.cpp @@ -0,0 +1,235 @@ +/* +Copyright (c) 2002, Mayukh Bose +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +* Neither the name of Mayukh Bose nor the names of other +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* + Change History: + 11/23/2004 - Removed the #include <unistd.h> line because I didn't + need it. Wonder why I had it there in the first place :). + 10/20/2004 - Publicly released this code. +*/ +#include <string> +#include <cstdio> +#include <curl/curl.h> +#include <curl/easy.h> +#include "curlfuncs.h" + +#ifndef TRUE +#define TRUE 1 +#endif + +using namespace std; + +// Local function prototypes +int CurlDoPost(const char *url, string *sOutput, const string &sReferer, + struct curl_httppost *formpost, struct curl_slist *headerlist); + +namespace curlfuncs { + string sBuf; + bool bInitialized = false; + CURL *curl = NULL; +} + +size_t collect_data(void *ptr, size_t size, size_t nmemb, void *stream) +{ + string sTmp; + register size_t actualsize = size * nmemb; + if ((FILE *)stream == NULL) { + sTmp.assign((char *)ptr, actualsize); + curlfuncs::sBuf += sTmp; + } + else { + size_t xxx = fwrite(ptr, size, nmemb, (FILE *)stream); + } + return actualsize; +} + +inline void InitCurlLibraryIfNeeded() +{ + if (!curlfuncs::bInitialized) { + curl_global_init(CURL_GLOBAL_ALL); + curlfuncs::curl = curl_easy_init(); + if (!curlfuncs::curl) + throw string("Could not create new curl instance"); + curl_easy_setopt(curlfuncs::curl, CURLOPT_NOPROGRESS, 1); // Do not show progress + curl_easy_setopt(curlfuncs::curl, CURLOPT_WRITEFUNCTION, collect_data); + curl_easy_setopt(curlfuncs::curl, CURLOPT_WRITEDATA, 0); // Set option to write to string + curl_easy_setopt(curlfuncs::curl, CURLOPT_FOLLOWLOCATION, TRUE); + curl_easy_setopt(curlfuncs::curl, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; Mayukh's libcurl wrapper http://www.mayukhbose.com/)"); + curlfuncs::bInitialized = true; + } +} + +int CurlGetUrl(const char *url, string *sOutput, const string &sReferer) +{ + InitCurlLibraryIfNeeded(); + + curl_easy_setopt(curlfuncs::curl, CURLOPT_URL, url); // Set the URL to get + if (sReferer != "") + curl_easy_setopt(curlfuncs::curl, CURLOPT_REFERER, sReferer.c_str()); + curl_easy_setopt(curlfuncs::curl, CURLOPT_HTTPGET, TRUE); + curl_easy_setopt(curlfuncs::curl, CURLOPT_WRITEDATA, 0); // Set option to write to string + curlfuncs::sBuf = ""; + if (curl_easy_perform(curlfuncs::curl) == 0) + *sOutput = curlfuncs::sBuf; + else { + // We have an error here mate! + *sOutput = ""; + return 0; + } + + return 1; +} + +int CurlGetUrlFile(const char *url, const char *filename, const string &sReferer) +{ + int nRet = 0; + InitCurlLibraryIfNeeded(); + + // Point the output to a file + FILE *fp; + if ((fp = fopen(filename, "w")) == NULL) + return 0; + + curl_easy_setopt(curlfuncs::curl, CURLOPT_WRITEDATA, fp); // Set option to write to file + curl_easy_setopt(curlfuncs::curl, CURLOPT_URL, url); // Set the URL to get + if (sReferer != "") + curl_easy_setopt(curlfuncs::curl, CURLOPT_REFERER, sReferer.c_str()); + curl_easy_setopt(curlfuncs::curl, CURLOPT_HTTPGET, TRUE); + if (curl_easy_perform(curlfuncs::curl) == 0) + nRet = 1; + else + nRet = 0; + + curl_easy_setopt(curlfuncs::curl, CURLOPT_WRITEDATA, NULL); // Set option back to default (string) + fclose(fp); + return nRet; +} + +int CurlPostUrl(const char *url, const string &sPost, string *sOutput, const string &sReferer) +{ + InitCurlLibraryIfNeeded(); + + int retval = 1; + string::size_type nStart = 0, nEnd, nPos; + string sTmp, sName, sValue; + struct curl_httppost *formpost=NULL; + struct curl_httppost *lastptr=NULL; + struct curl_slist *headerlist=NULL; + + // Add the POST variables here + while ((nEnd = sPost.find("##", nStart)) != string::npos) { + sTmp = sPost.substr(nStart, nEnd - nStart); + if ((nPos = sTmp.find("=")) == string::npos) + return 0; + sName = sTmp.substr(0, nPos); + sValue = sTmp.substr(nPos+1); + curl_formadd(&formpost, &lastptr, CURLFORM_COPYNAME, sName.c_str(), CURLFORM_COPYCONTENTS, sValue.c_str(), CURLFORM_END); + nStart = nEnd + 2; + } + sTmp = sPost.substr(nStart); + if ((nPos = sTmp.find("=")) == string::npos) + return 0; + sName = sTmp.substr(0, nPos); + sValue = sTmp.substr(nPos+1); + curl_formadd(&formpost, &lastptr, CURLFORM_COPYNAME, sName.c_str(), CURLFORM_COPYCONTENTS, sValue.c_str(), CURLFORM_END); + + retval = CurlDoPost(url, sOutput, sReferer, formpost, headerlist); + + curl_formfree(formpost); + curl_slist_free_all(headerlist); + return retval; +} + +int CurlPostRaw(const char *url, const string &sPost, string *sOutput, const string &sReferer) +{ + InitCurlLibraryIfNeeded(); + + int retval; + struct curl_httppost *formpost=NULL; + struct curl_slist *headerlist=NULL; + + curl_easy_setopt(curlfuncs::curl, CURLOPT_POSTFIELDS, sPost.c_str()); + curl_easy_setopt(curlfuncs::curl, CURLOPT_POSTFIELDSIZE, 0); //FIXME: Should this be the size instead, in case this is binary string? + + retval = CurlDoPost(url, sOutput, sReferer, formpost, headerlist); + + curl_formfree(formpost); + curl_slist_free_all(headerlist); + return retval; +} + +int CurlDoPost(const char *url, string *sOutput, const string &sReferer, + struct curl_httppost *formpost, struct curl_slist *headerlist) +{ + headerlist = curl_slist_append(headerlist, "Expect:"); + + // Now do the form post + curl_easy_setopt(curlfuncs::curl, CURLOPT_URL, url); + if (sReferer != "") + curl_easy_setopt(curlfuncs::curl, CURLOPT_REFERER, sReferer.c_str()); + curl_easy_setopt(curlfuncs::curl, CURLOPT_HTTPPOST, formpost); + + curl_easy_setopt(curlfuncs::curl, CURLOPT_WRITEDATA, 0); // Set option to write to string + curlfuncs::sBuf = ""; + if (curl_easy_perform(curlfuncs::curl) == 0) { + *sOutput = curlfuncs::sBuf; + return 1; + } + else { + // We have an error here mate! + *sOutput = ""; + return 0; + } +} + +void FreeCurlLibrary(void) +{ + if (curlfuncs::curl) + curl_easy_cleanup(curlfuncs::curl); + curl_global_cleanup(); + curlfuncs::bInitialized = false; +} + +int CurlSetCookieFile(char *filename) +{ + InitCurlLibraryIfNeeded(); + if (curl_easy_setopt(curlfuncs::curl, CURLOPT_COOKIEFILE, filename) != 0) + return 0; + if (curl_easy_setopt(curlfuncs::curl, CURLOPT_COOKIEJAR, filename) != 0) + return 0; + return 1; +} + +char *CurlEscape(const char *url) { + InitCurlLibraryIfNeeded(); + return curl_escape(url , strlen(url)); +} diff --git a/tools/curlfuncs.h b/tools/curlfuncs.h new file mode 100644 index 0000000..e0f76cf --- /dev/null +++ b/tools/curlfuncs.h @@ -0,0 +1,45 @@ +/* +Copyright (c) 2002, Mayukh Bose +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +* Neither the name of Mayukh Bose nor the names of other +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef __CURLFUNCS_H_20020513__ +#define __CURLFUNCS_H_20020513__ +#include <string> + using namespace std; + +int CurlGetUrl(const char *url, string *sOutput, const string &sReferer=""); +int CurlGetUrlFile(const char *url, const char *filename, const string &sReferer=""); +void FreeCurlLibrary(void); +int CurlSetCookieFile(char *filename); +int CurlPostUrl(const char *url, const string &sPost, string *sOutput, const string &sReferer = ""); +int CurlPostRaw(const char *url, const string &sPost, string *sOutput, const string &sReferer = ""); +char *CurlEscape(const char *url); +#endif diff --git a/tools/filesystem.c b/tools/filesystem.c new file mode 100644 index 0000000..724b28c --- /dev/null +++ b/tools/filesystem.c @@ -0,0 +1,44 @@ +#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/vfs.h>
+#include <fstream>
+
+using namespace std;
+
+bool CreateDirectory(string dir) {
+ mkdir(dir.c_str(), 0775);
+ //check if successfull
+ DIR *pDir;
+ bool exists = false;
+ pDir = opendir(dir.c_str());
+ if (pDir != NULL) {
+ exists = true;
+ closedir(pDir);
+ }
+ return exists;
+}
+
+bool FileExists(string filename) {
+ ifstream ifile(filename.c_str());
+ if (ifile) {
+ //a valid image should be larger then 500 bytes
+ ifile.seekg (0, ifile.end);
+ int length = ifile.tellg();
+ if (length > 500)
+ return true;
+ }
+ return false;
+}
+
+bool CheckDirExists(const char* dirName) {
+ struct statfs statfsbuf;
+ if (statfs(dirName,&statfsbuf)==-1) return false;
+ if ((statfsbuf.f_type!=0x01021994) && (statfsbuf.f_type!=0x28cd3d45)) return false;
+ if (access(dirName,R_OK|W_OK)==-1) return false;
+ return true;
+
+}
+
+void DeleteFile(string filename) {
+ remove(filename.c_str());
+}
\ No newline at end of file diff --git a/tools/fuzzy.c b/tools/fuzzy.c new file mode 100644 index 0000000..188f017 --- /dev/null +++ b/tools/fuzzy.c @@ -0,0 +1,70 @@ +#include <iterator> +#include <algorithm> +#include <vector> +#include <string> +#include <sstream> +#include <iostream> + +template<typename T, typename C> +size_t +seq_distance(const T& seq1, const T& seq2, const C& cost, + const typename T::value_type& empty = typename T::value_type()) { + const size_t size1 = seq1.size(); + const size_t size2 = seq2.size(); + + std::vector<size_t> curr_col(size2 + 1); + std::vector<size_t> prev_col(size2 + 1); + + // Prime the previous column for use in the following loop: + prev_col[0] = 0; + for (size_t idx2 = 0; idx2 < size2; ++idx2) { + prev_col[idx2 + 1] = prev_col[idx2] + cost(empty, seq2[idx2]); + } + + for (size_t idx1 = 0; idx1 < size1; ++idx1) { + curr_col[0] = curr_col[0] + cost(seq1[idx1], empty); + + for (size_t idx2 = 0; idx2 < size2; ++idx2) { + curr_col[idx2 + 1] = std::min(std::min( + curr_col[idx2] + cost(empty, seq2[idx2]), + prev_col[idx2 + 1] + cost(seq1[idx1], empty)), + prev_col[idx2] + cost(seq1[idx1], seq2[idx2])); + } + + curr_col.swap(prev_col); + curr_col[0] = prev_col[0]; + } + + return prev_col[size2]; +} + +size_t +letter_distance(char letter1, char letter2) { + return letter1 != letter2 ? 1 : 0; +} + +size_t +word_distance(const std::string& word1, const std::string& word2) { + return seq_distance(word1, word2, &letter_distance); +} + +size_t +sentence_distance(const std::string& sentence1, const std::string& sentence2) { + std::vector<std::string> words1; + std::vector<std::string> words2; + std::istringstream iss1(sentence1); + std::istringstream iss2(sentence2); + for(std::istream_iterator<std::string> it(iss1), end; ; ) { + words1.push_back(*it); + if(++it == end) + break; + words1.push_back(" "); + } + for(std::istream_iterator<std::string> it(iss2), end; ; ) { + words2.push_back(*it); + if(++it == end) + break; + words2.push_back(" "); + } + return seq_distance(words1, words2, &word_distance); +}
\ No newline at end of file diff --git a/tools/splitstring.c b/tools/splitstring.c new file mode 100644 index 0000000..14af577 --- /dev/null +++ b/tools/splitstring.c @@ -0,0 +1,32 @@ +using namespace std; + +class splitstring : public string { + vector<string> flds; +public: + splitstring(const char *s) : string(s) { }; + vector<string>& split(char delim, int rep=0); +}; + +// split: receives a char delimiter; returns a vector of strings +// By default ignores repeated delimiters, unless argument rep == 1. +vector<string>& splitstring::split(char delim, int rep) { + if (!flds.empty()) flds.clear(); // empty vector if necessary + string work = data(); + string buf = ""; + int i = 0; + while (i < work.length()) { + if (work[i] != delim) + buf += work[i]; + else if (rep == 1) { + flds.push_back(buf); + buf = ""; + } else if (buf.length() > 0) { + flds.push_back(buf); + buf = ""; + } + i++; + } + if (!buf.empty()) + flds.push_back(buf); + return flds; +} |
