path: root/scraper
diff options
Diffstat (limited to 'scraper')
18 files changed, 1872 insertions, 0 deletions
diff --git a/scraper/themoviedbscraper/moviedbactor.c b/scraper/themoviedbscraper/moviedbactor.c
new file mode 100644
index 0000000..4e81af2
--- /dev/null
+++ b/scraper/themoviedbscraper/moviedbactor.c
@@ -0,0 +1,49 @@
+#include <string>
+#include <sstream>
+#include <vector>
+#include <iostream>
+#include <jansson.h>
+#include "moviedbactor.h"
+#include "../../tools/stringhelpers.h"
+using namespace std;
+cMovieDbActors::cMovieDbActors(string json, string actorsBaseUrl) {
+ this->json = json;
+ this->actorsBaseUrl = actorsBaseUrl;
+cMovieDbActors::~cMovieDbActors() {
+void cMovieDbActors::ParseJSON(vector<cMovieDBActor*> *actors) {
+ cJsonLoader jActors(json.c_str());
+ if (!jActors.isObject()) {
+ return;
+ }
+ json_t *cast = jActors.objectByName("cast");
+ if(!json_is_array(cast)) {
+ return;
+ }
+ size_t numActors = json_array_size(cast);
+ for (size_t i = 0; i < numActors; i++) {
+ json_t *jActor = json_array_get(cast, i);
+ if (!json_is_object(jActor)) {
+ return;
+ }
+ json_t *jId = json_object_get(jActor, "id");
+ json_t *jName = json_object_get(jActor, "name");
+ json_t *jRole = json_object_get(jActor, "character");
+ json_t *jPath = json_object_get(jActor, "profile_path");
+ if (!json_is_integer(jId) || !json_is_string(jName) || !json_is_string(jRole) || !json_is_string(jPath))
+ return;
+ cMovieDBActor *actor = new cMovieDBActor();
+ actor->id = json_integer_value(jId);
+ actor->name = json_string_value(jName);
+ actor->role = json_string_value(jRole);
+ actor->thumbUrl = actorsBaseUrl + json_string_value(jPath);
+ actors->push_back(actor);
+ }
diff --git a/scraper/themoviedbscraper/moviedbactor.h b/scraper/themoviedbscraper/moviedbactor.h
new file mode 100644
index 0000000..1b51682
--- /dev/null
+++ b/scraper/themoviedbscraper/moviedbactor.h
@@ -0,0 +1,44 @@
+using namespace std;
+// --- cMovieDBActor -------------------------------------------------------------
+class cMovieDBActor {
+ cMovieDBActor(void) {
+ id = 0;
+ thumbUrl = "";
+ name = "";
+ role = "";
+ width = 370;
+ height = 556;
+ };
+ int id;
+ string thumbUrl;
+ int width;
+ int height;
+ string name;
+ string role;
+ void Dump(void) {
+ cout << "id: " << id << endl;
+ cout << "name: " << name << endl;
+ cout << "role: " << role << endl;
+ cout << "thumbnail: " << thumbUrl << endl;
+ };
+// --- cMovieDBActors -------------------------------------------------------------
+class cMovieDbActors {
+ string json;
+ string actorsBaseUrl;
+ cMovieDbActors(string json, string actorsBaseUrl);
+ virtual ~cMovieDbActors(void);
+ void ParseJSON(vector<cMovieDBActor*> *actors);
diff --git a/scraper/themoviedbscraper/moviedbmovie.c b/scraper/themoviedbscraper/moviedbmovie.c
new file mode 100644
index 0000000..8060930
--- /dev/null
+++ b/scraper/themoviedbscraper/moviedbmovie.c
@@ -0,0 +1,255 @@
+#include <string>
+#include <sstream>
+#include <vector>
+#include <algorithm>
+#include <iostream>
+#include <jansson.h>
+#include "../../lib/curl.h"
+#include "../../tools/fuzzy.h"
+#include "../../tools/stringhelpers.h"
+#include "moviedbmovie.h"
+using namespace std;
+cMovieDbMovie::cMovieDbMovie(string json) {
+ this->json = json;
+ title = "";
+ originalTitle = "";
+ tagline = "";
+ overview = "";
+ backdropPath = "";
+ posterPath = "";
+ adult = false;
+ collectionID = 0;
+ collectionName = "";
+ collectionPosterPath = "";
+ collectionBackdropPath = "";
+ budget = 0;
+ revenue = 0;
+ genres = "";
+ homepage = "";
+ imdbid = "";
+ releaseDate = "";
+ runtime = 0;
+ popularity = 0.0;
+ voteAverage = 0.0;
+ currentActor = 0;
+ backdropWidth = 1280;
+ backdropHeight = 720;
+ posterWidth = 500;
+ posterHeight = 750;
+cMovieDbMovie::~cMovieDbMovie() {
+ for(std::vector<cMovieDBActor*>::const_iterator it = actors.begin(); it != actors.end(); it++) {
+ delete *it;
+ }
+ actors.clear();
+void cMovieDbMovie::ParseJSON(void) {
+ cJsonLoader jMovie(json.c_str());
+ if (!jMovie.isObject()) {
+ return;
+ }
+ json_t *jTitle = jMovie.objectByName("title");
+ if(json_is_string(jTitle)) {
+ title = json_string_value(jTitle);;
+ }
+ json_t *jOriginalTitle = jMovie.objectByName("original_title");
+ if(json_is_string(jOriginalTitle)) {
+ originalTitle = json_string_value(jOriginalTitle);
+ }
+ json_t *jOverview = jMovie.objectByName("overview");
+ if(json_is_string(jOverview)) {
+ overview = json_string_value(jOverview);
+ }
+ json_t *jBackdrop = jMovie.objectByName("backdrop_path");
+ if(json_is_string(jBackdrop)) {
+ backdropPath = backdropBaseUrl + json_string_value(jBackdrop);
+ }
+ json_t *jPoster = jMovie.objectByName("poster_path");
+ if(json_is_string(jPoster)) {
+ posterPath = posterBaseUrl + json_string_value(jPoster);
+ }
+ json_t *jAdult = jMovie.objectByName("adult");
+ if(json_is_true(jAdult)) {
+ adult = true;
+ }
+ json_t *collection = jMovie.objectByName("belongs_to_collection");
+ if(json_is_object(collection)) {
+ json_t *colID = json_object_get(collection, "id");
+ if (json_is_integer(colID)) {
+ collectionID = (int)json_integer_value(colID);
+ }
+ json_t *colName = json_object_get(collection, "name");
+ if(json_is_string(jPoster)) {
+ collectionName = json_string_value(colName);
+ }
+ json_t *colPoster = json_object_get(collection, "poster_path");
+ if(json_is_string(colPoster)) {
+ collectionPosterPath = posterBaseUrl + json_string_value(colPoster);
+ }
+ json_t *colBackdrop = json_object_get(collection, "backdrop_path");
+ if(json_is_string(colBackdrop)) {
+ collectionBackdropPath = backdropBaseUrl + json_string_value(colBackdrop);
+ }
+ }
+ json_t *jBudget = jMovie.objectByName("budget");
+ if (json_is_integer(jBudget)) {
+ budget = (int)json_integer_value(jBudget);
+ }
+ json_t *aGenres = jMovie.objectByName("genres");
+ if(json_is_array(aGenres)) {
+ size_t numGenres = json_array_size(aGenres);
+ for (size_t res = 0; res < numGenres; res++) {
+ json_t *result = json_array_get(aGenres, res);
+ if (json_is_object(result)) {
+ json_t *jGenre = json_object_get(result, "name");
+ if(json_is_string(jGenre)) {
+ genres += json_string_value(jGenre);
+ if ((res+1) < numGenres)
+ genres += " | ";
+ }
+ }
+ }
+ }
+ json_t *jHomepage = jMovie.objectByName("homepage");
+ if(json_is_string(jHomepage)) {
+ homepage = json_string_value(jHomepage);
+ }
+ json_t *jIMDB = jMovie.objectByName("imdb_id");
+ if(json_is_string(jIMDB)) {
+ imdbid = json_string_value(jIMDB);
+ }
+ json_t *jPopularity = jMovie.objectByName("popularity");
+ if(json_is_real(jPopularity)) {
+ popularity = json_real_value(jPopularity);
+ }
+ json_t *jReleaseDate = jMovie.objectByName("release_date");
+ if(json_is_string(jReleaseDate)) {
+ releaseDate = json_string_value(jReleaseDate);
+ }
+ json_t *jRevenue = jMovie.objectByName("revenue");
+ if (json_is_integer(jRevenue)) {
+ revenue = (int)json_integer_value(jRevenue);
+ }
+ json_t *jRuntime = jMovie.objectByName("runtime");
+ if (json_is_integer(jRuntime)) {
+ runtime = (int)json_integer_value(jRuntime);
+ }
+ json_t *jVote = jMovie.objectByName("vote_average");
+ if(json_is_real(jVote)) {
+ voteAverage = json_real_value(jVote);
+ }
+ json_t *jTagline = jMovie.objectByName("tagline");
+ if(json_is_string(jTagline)) {
+ tagline = json_string_value(jTagline);
+ }
+int cMovieDbMovie::ParseJSONForMovieId(string movieSearchString) {
+ //convert searchstring to lower case
+ transform(movieSearchString.begin(), movieSearchString.end(), movieSearchString.begin(), ::tolower);
+ cJsonLoader root(json.c_str());
+ if (!root.isObject()) {
+ return -1;
+ }
+ json_t *results = root.objectByName("results");
+ if(!json_is_array(results)) {
+ return -1;
+ }
+ size_t numResults = json_array_size(results);
+ for (size_t res = 0; res < numResults; res++) {
+ json_t *result = json_array_get(results, res);
+ if (!json_is_object(result)) {
+ return -1;
+ }
+ json_t *title = json_object_get(result, "title");
+ if (!json_is_string(title)) {
+ return -1;
+ }
+ string resultTitle = json_string_value(title);
+ //convert result to lower case
+ transform(resultTitle.begin(), resultTitle.end(), resultTitle.begin(), ::tolower);
+ json_t *jId = json_object_get(result, "id");
+ if (json_is_integer(jId)) {
+ int id = (int)json_integer_value(jId);
+ searchResult sRes;
+ = id;
+ sRes.distance = sentence_distance(resultTitle, movieSearchString);
+ resultSet.push_back(sRes);
+ }
+ }
+ return FindBestResult();
+int cMovieDbMovie::FindBestResult(void) {
+ int resID = -1;
+ int bestMatch = -1;
+ int numResults = resultSet.size();
+ for (int i=0; i<numResults; i++) {
+ if (i == 0) {
+ bestMatch = resultSet[i].distance;
+ resID = resultSet[i].id;
+ } else if (resultSet[i].distance < bestMatch) {
+ bestMatch = resultSet[i].distance;
+ resID = resultSet[i].id;
+ }
+ }
+ return resID;
+void cMovieDbMovie::ReadActors(void) {
+ stringstream url;
+ url << baseUrl << "/movie/" << id << "/casts?api_key=" << apiKey;
+ string actorsJSON;
+ if (curl.GetUrl(url.str().c_str(), &actorsJSON)) {
+ cMovieDbActors act(actorsJSON, actorBaseUrl);
+ act.ParseJSON(&actors);
+ }
+cMovieDBActor *cMovieDbMovie::GetActor(void) {
+ int numActors = actors.size();
+ if ((numActors < 1) || (currentActor >= numActors))
+ return NULL;
+ cMovieDBActor *act = actors[currentActor];
+ currentActor++;
+ return act;
+void cMovieDbMovie::Dump(void) {
+ cout << "-------------- MOVIE DUMP ---------------" << endl;
+ cout << "Title: " << title << endl;
+ cout << "Original Title: " << originalTitle << endl;
+ cout << "Tagline: " << tagline << endl;
+ cout << "Overview: " << overview << endl;
+ cout << "BackdropPath: " << backdropPath << endl;
+ cout << "PosterPath: " << posterPath << endl;
+ cout << "Adult: " << (adult?"true":"false") << endl;
+ if (!collectionID) {
+ cout << "Movie belongs not to a collection" << endl;
+ } else {
+ cout << "CollectionID: " << collectionID << endl;
+ cout << "Collection Name: " << collectionName << endl;
+ cout << "Collection BackdropPath: " << collectionBackdropPath << endl;
+ cout << "Collection PosterPath: " << collectionPosterPath << endl;
+ }
+ cout << "Budget: " << budget << "$" << endl;
+ cout << "Revenue: " << revenue << "$" << endl;
+ cout << "Genres: " << genres << endl;
+ cout << "Homepage: " << homepage << endl;
+ cout << "imdbID: " << imdbid << endl;
+ cout << "Release Date: " << releaseDate << endl;
+ cout << "Runtime: " << runtime << " min" << endl;
+ cout << "Popularity: " << popularity << endl;
+ cout << "Vote Average: " << voteAverage << endl;
+ int numActors = actors.size();
+ cout << "--------------------- " << numActors << " actors found -----------------" << endl;
+ for (int i=0; i<numActors; i++) {
+ actors[i]->Dump();
+ }
diff --git a/scraper/themoviedbscraper/moviedbmovie.h b/scraper/themoviedbscraper/moviedbmovie.h
new file mode 100644
index 0000000..9e9c2fd
--- /dev/null
+++ b/scraper/themoviedbscraper/moviedbmovie.h
@@ -0,0 +1,68 @@
+#include "moviedbactor.h"
+using namespace std;
+struct searchResult {
+ int id;
+ int distance;
+// --- cMovieDbMovie -------------------------------------------------------------
+class cMovieDbMovie {
+ string json;
+ vector<searchResult> resultSet;
+ string apiKey;
+ string baseUrl;
+ string posterBaseUrl;
+ string backdropBaseUrl;
+ string actorBaseUrl;
+ vector<cMovieDBActor*> actors;
+ int currentActor;
+ int FindBestResult(void);
+ cMovieDbMovie(string json);
+ virtual ~cMovieDbMovie(void);
+ int id;
+ string title;
+ string originalTitle;
+ string tagline;
+ string overview;
+ string backdropPath;
+ int backdropWidth;
+ int backdropHeight;
+ string posterPath;
+ int posterWidth;
+ int posterHeight;
+ bool adult;
+ int collectionID;
+ string collectionName;
+ string collectionPosterPath;
+ string collectionBackdropPath;
+ int budget;
+ int revenue;
+ string genres;
+ string homepage;
+ string imdbid;
+ string releaseDate;
+ int runtime;
+ float popularity;
+ float voteAverage;
+ int ParseJSONForMovieId(string movieSearchString);
+ void ParseJSON(void);
+ void SetApiKey(string apiKey) { this->apiKey = apiKey; };
+ void SetBaseUrl(string baseUrl) { this->baseUrl = baseUrl; };
+ void SetPosterBaseUrl(string url) { posterBaseUrl = url; };
+ void SetBackdropBaseUrl(string url) { backdropBaseUrl = url; };
+ void SetActorBaseUrl(string url) { actorBaseUrl = url; };
+ void ReadActors(void);
+ cMovieDBActor *GetActor(void);
+ void Dump();
diff --git a/scraper/themoviedbscraper/themoviedbscraper.c b/scraper/themoviedbscraper/themoviedbscraper.c
new file mode 100644
index 0000000..62263a9
--- /dev/null
+++ b/scraper/themoviedbscraper/themoviedbscraper.c
@@ -0,0 +1,103 @@
+#include <string>
+#include <sstream>
+#include <vector>
+#include <map>
+#include <algorithm>
+#include <iostream>
+#include <jansson.h>
+#include "../../lib/curl.h"
+#include "../../tools/stringhelpers.h"
+#include "themoviedbscraper.h"
+using namespace std;
+cMovieDBScraper::cMovieDBScraper(string language) {
+ apiKey = "abb01b5a277b9c2c60ec0302d83c5ee9";
+ this->language = language;
+ baseURL = "";
+ posterSize = "w500";
+ backdropSize = "w1280";
+ actorthumbSize = "h632";
+cMovieDBScraper::~cMovieDBScraper() {
+cMovieDbMovie *cMovieDBScraper::Scrap(string movieName, string year) {
+ int movieID = SearchMovie(movieName, year);
+ if (movieID < 1) {
+ return NULL;
+ }
+ cMovieDbMovie *movie = ReadMovie(movieID);
+ if (!movie)
+ return NULL;
+ return movie;
+bool cMovieDBScraper::Connect(void) {
+ stringstream url;
+ url << baseURL << "/configuration?api_key=" << apiKey;
+ string configJSON;
+ if (curl.GetUrl(url.str().c_str(), &configJSON)) {
+ return parseJSON(configJSON);
+ }
+ return false;
+bool cMovieDBScraper::parseJSON(string jsonString) {
+ cJsonLoader root(jsonString.c_str());
+ if (!root.isObject()) {
+ return false;
+ }
+ json_t *images = root.objectByName("images");
+ if(!json_is_object(images)) {
+ return false;
+ }
+ json_t *imgUrl;
+ imgUrl = json_object_get(images, "base_url");
+ if(!json_is_string(imgUrl)) {
+ return false;
+ }
+ imageUrl = json_string_value(imgUrl);
+ return true;
+int cMovieDBScraper::SearchMovie(string movieName, string year) {
+ stringstream url;
+ string movieJSON;
+ int movieID = -1;
+ char* escUrl = curl.EscapeUrl(movieName.c_str());
+ url << baseURL << "/search/movie?api_key=" << apiKey << "&query=" << escUrl << "&language=" << language.c_str();
+ if (year.size() > 0)
+ url << "&year=" << year.c_str();
+ curl.Free(escUrl);
+ if (curl.GetUrl(url.str().c_str(), &movieJSON)) {
+ cMovieDbMovie movie(movieJSON);
+ movieID = movie.ParseJSONForMovieId(movieName);
+ }
+ return movieID;
+cMovieDbMovie *cMovieDBScraper::ReadMovie(int movieID) {
+ stringstream url;
+ url << baseURL << "/movie/" << movieID << "?api_key=" << apiKey << "&language=" << language.c_str();
+ string movieJSON;
+ cMovieDbMovie *movie = NULL;
+ if (curl.GetUrl(url.str().c_str(), &movieJSON)) {
+ movie = new cMovieDbMovie(movieJSON);
+ movie->id = movieID;
+ movie->SetBaseUrl(baseURL);
+ movie->SetApiKey(apiKey);
+ movie->SetPosterBaseUrl(imageUrl + posterSize);
+ movie->SetBackdropBaseUrl(imageUrl + backdropSize);
+ movie->SetActorBaseUrl(imageUrl + actorthumbSize);
+ movie->ParseJSON();
+ }
+ return movie;
diff --git a/scraper/themoviedbscraper/themoviedbscraper.h b/scraper/themoviedbscraper/themoviedbscraper.h
new file mode 100644
index 0000000..d2f449e
--- /dev/null
+++ b/scraper/themoviedbscraper/themoviedbscraper.h
@@ -0,0 +1,31 @@
+#include "moviedbmovie.h"
+#include "moviedbactor.h"
+using namespace std;
+// --- cMovieDBScraper -------------------------------------------------------------
+class cMovieDBScraper {
+ string apiKey;
+ string language;
+ string baseURL;
+ string imageUrl;
+ string posterSize;
+ string backdropSize;
+ string actorthumbSize;
+ bool parseJSON(string jsonString);
+ int SearchMovie(string movieName, string year);
+ cMovieDBScraper(string language);
+ virtual ~cMovieDBScraper(void);
+ bool Connect(void);
+ cMovieDbMovie *Scrap(string movieName, string year="");
+ cMovieDbMovie *ReadMovie(int movieID);
diff --git a/scraper/thetvdbscraper/thetvdbscraper.c b/scraper/thetvdbscraper/thetvdbscraper.c
new file mode 100644
index 0000000..e83690c
--- /dev/null
+++ b/scraper/thetvdbscraper/thetvdbscraper.c
@@ -0,0 +1,188 @@
+#include "../../lib/curl.h"
+#include "thetvdbscraper.h"
+using namespace std;
+cTVDBScraper::cTVDBScraper(string language) {
+ apiKey = "E9DBB94CA50832ED";
+ baseURL = "";
+ this->language = language;
+ mirrors = NULL;
+ xmlInitParser();
+cTVDBScraper::~cTVDBScraper() {
+ if (mirrors)
+ delete mirrors;
+cTVDBSeries *cTVDBScraper::ScrapInitial(string seriesName) {
+ cTVDBSeries *series = NULL;
+ int seriesID = ReadSeries(seriesName);
+ if (seriesID) {
+ series = new cTVDBSeries(seriesID, language, apiKey, mirrors);
+ }
+ return series;
+cTVDBSeries *cTVDBScraper::GetSeries(int seriesID) {
+ return new cTVDBSeries(seriesID, language, apiKey, mirrors);
+cTVDBEpisode *cTVDBScraper::GetEpisode(int episodeID) {
+ return new cTVDBEpisode(episodeID, language, apiKey, mirrors);
+bool cTVDBScraper::Connect(void) {
+ stringstream url;
+ url << baseURL << "/api/" << apiKey << "/mirrors.xml";
+ string mirrorsXML;
+ bool ok = false;
+ if (curl.GetUrl(url.str().c_str(), &mirrorsXML)) {
+ mirrors = new cTVDBMirrors();
+ ok = mirrors->ParseXML(mirrorsXML);
+ }
+ return ok;
+int cTVDBScraper::GetServerTime(void) {
+ string url = "";
+ string serverTimeXML;
+ if (!curl.GetUrl(url.c_str(), &serverTimeXML)) {
+ return 0;
+ }
+ xmlDoc *doc = SetXMLDoc(serverTimeXML);
+ if (doc == NULL)
+ return 0;
+ //Root Element has to be <Items>
+ xmlNode *node = NULL;
+ node = xmlDocGetRootElement(doc);
+ if (!(node && !xmlStrcmp(node->name, (const xmlChar *)"Items"))) {
+ xmlFreeDoc(doc);
+ return 0;
+ }
+ node = node->children;
+ xmlNode *cur_node = NULL;
+ for (cur_node = node; cur_node; cur_node = cur_node->next) {
+ if ((cur_node->type == XML_ELEMENT_NODE) && !xmlStrcmp(cur_node->name, (const xmlChar *)"Time")) {
+ node = cur_node;
+ break;
+ } else {
+ node = NULL;
+ }
+ }
+ if (!node) {
+ xmlFreeDoc(doc);
+ return 0;
+ }
+ xmlChar *node_content = xmlNodeListGetString(doc, cur_node->xmlChildrenNode, 1);
+ int serverTime = atoi((const char *)node_content);
+ xmlFree(node_content);
+ xmlFreeDoc(doc);
+ return serverTime;
+bool cTVDBScraper::GetUpdatedSeriesandEpisodes(set<int> *updatedSeries, set<int> *updatedEpisodes, int lastScrap) {
+ stringstream url;
+ url << "" << lastScrap;
+ string updatedXML;
+ if (!curl.GetUrl(url.str().c_str(), &updatedXML)) {
+ return false;
+ }
+ xmlDoc *doc = SetXMLDoc(updatedXML);
+ if (doc == NULL)
+ return false;
+ //Root Element has to be <Items>
+ xmlNode *node = NULL;
+ node = xmlDocGetRootElement(doc);
+ if (!(node && !xmlStrcmp(node->name, (const xmlChar *)"Items"))) {
+ xmlFreeDoc(doc);
+ return false;
+ }
+ xmlNode *cur_node = node->children;
+ for (; cur_node; cur_node = cur_node->next) {
+ if ((cur_node->type == XML_ELEMENT_NODE) && !xmlStrcmp(cur_node->name, (const xmlChar *)"Series")) {
+ xmlChar *node_content = xmlNodeListGetString(doc, cur_node->xmlChildrenNode, 1);
+ int seriesId = atoi((const char *)node_content);
+ xmlFree(node_content);
+ updatedSeries->insert(seriesId);
+ } else if ((cur_node->type == XML_ELEMENT_NODE) && !xmlStrcmp(cur_node->name, (const xmlChar *)"Episode")) {
+ xmlChar *node_content = xmlNodeListGetString(doc, cur_node->xmlChildrenNode, 1);
+ int episodeId = atoi((const char *)node_content);
+ xmlFree(node_content);
+ updatedEpisodes->insert(episodeId);
+ }
+ }
+ xmlFreeDoc(doc);
+ return true;
+int cTVDBScraper::ReadSeries(string seriesName) {
+ int seriesID = 0;
+ stringstream url;
+ string seriesXML;
+ char* escUrl = curl.EscapeUrl(seriesName.c_str());
+ url << mirrors->GetMirrorXML() << "/api/GetSeries.php?seriesname=" << escUrl << "&language=" << language.c_str();
+ curl.Free(escUrl);
+ if (curl.GetUrl(url.str().c_str(), &seriesXML))
+ seriesID = ParseXML(seriesXML);
+ return seriesID;
+int cTVDBScraper::ParseXML(string xml) {
+ int seriesID = 0;
+ xmlDoc *doc = SetXMLDoc(xml);
+ if (doc == NULL)
+ return seriesID;
+ //Root Element has to be <Data>
+ xmlNode *node = NULL;
+ node = xmlDocGetRootElement(doc);
+ if (!(node && !xmlStrcmp(node->name, (const xmlChar *)"Data"))) {
+ xmlFreeDoc(doc);
+ return seriesID;
+ }
+ //Searching for <Series>
+ node = node->children;
+ xmlNode *cur_node = NULL;
+ for (cur_node = node; cur_node; cur_node = cur_node->next) {
+ if ((cur_node->type == XML_ELEMENT_NODE) && !xmlStrcmp(cur_node->name, (const xmlChar *)"Series")) {
+ node = cur_node;
+ break;
+ } else {
+ node = NULL;
+ }
+ }
+ if (!node) {
+ xmlFreeDoc(doc);
+ return seriesID;
+ }
+ //now read the first series
+ node = node->children;
+ xmlChar *node_content;
+ for (cur_node = node; cur_node; cur_node = cur_node->next) {
+ if (cur_node->type == XML_ELEMENT_NODE) {
+ node_content = xmlNodeListGetString(doc, cur_node->xmlChildrenNode, 1);
+ if (!node_content)
+ continue;
+ if (!xmlStrcmp(cur_node->name, (const xmlChar *)"seriesid")) {
+ seriesID = atoi((const char *)node_content);
+ xmlFree(node_content);
+ break;
+ }
+ xmlFree(node_content);
+ }
+ }
+ xmlFreeDoc(doc);
+ return seriesID;
+xmlDoc *cTVDBScraper::SetXMLDoc(string xml) {
+ xmlDoc *doc = xmlReadMemory(xml.c_str(), xml.size(), "noname.xml", NULL, 0);
+ return doc;
diff --git a/scraper/thetvdbscraper/thetvdbscraper.h b/scraper/thetvdbscraper/thetvdbscraper.h
new file mode 100644
index 0000000..051842c
--- /dev/null
+++ b/scraper/thetvdbscraper/thetvdbscraper.h
@@ -0,0 +1,39 @@
+#include <iostream>
+#include <string>
+#include <sstream>
+#include <vector>
+#include <map>
+#include <set>
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+#include "tvdbseries.h"
+#include "tvdbmirrors.h"
+using namespace std;
+// --- cTVDBScraper -------------------------------------------------------------
+class cTVDBScraper {
+ string apiKey;
+ string baseURL;
+ string language;
+ cTVDBMirrors *mirrors;
+ xmlDoc *SetXMLDoc(string xml);
+ int ParseXML(string xml);
+ int ReadSeries(string seriesName);
+ cTVDBScraper(string language);
+ virtual ~cTVDBScraper(void);
+ bool Connect(void);
+ int GetServerTime(void);
+ cTVDBSeries *ScrapInitial(string seriesName);
+ bool GetUpdatedSeriesandEpisodes(set<int> *updatedSeries, set<int> *updatedEpisodes, int lastScrap);
+ cTVDBSeries *GetSeries(int seriesID);
+ cTVDBEpisode *GetEpisode(int episodeID);
diff --git a/scraper/thetvdbscraper/tvdbactor.c b/scraper/thetvdbscraper/tvdbactor.c
new file mode 100644
index 0000000..f64846a
--- /dev/null
+++ b/scraper/thetvdbscraper/tvdbactor.c
@@ -0,0 +1,66 @@
+#include <iostream>
+#include <string>
+#include <sstream>
+#include <vector>
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+#include "tvdbactor.h"
+using namespace std;
+cTVDBActors::cTVDBActors(string language, cTVDBMirrors *mirrors) {
+ this->language = language;
+ this->mirrors = mirrors;
+cTVDBActors::~cTVDBActors() {
+void cTVDBActors::ParseXML(string xml, vector<cTVDBActor*> *actors) {
+ xmlDoc *doc = xmlReadMemory(xml.c_str(), xml.size(), "noname.xml", NULL, 0);
+ if (doc == NULL)
+ return;
+ //Root Element has to be <Actors>
+ xmlNode *node = NULL;
+ node = xmlDocGetRootElement(doc);
+ if (!(node && !xmlStrcmp(node->name, (const xmlChar *)"Actors"))) {
+ xmlFreeDoc(doc);
+ return;
+ }
+ //Looping through actors
+ node = node->children;
+ xmlNode *cur_node = NULL;
+ for (cur_node = node; cur_node; cur_node = cur_node->next) {
+ if ((cur_node->type == XML_ELEMENT_NODE) && !xmlStrcmp(cur_node->name, (const xmlChar *)"Actor")) {
+ cTVDBActor *actor = ReadEntry(doc, cur_node->children);
+ actors->push_back(actor);
+ }
+ }
+ xmlFreeDoc(doc);
+cTVDBActor *cTVDBActors::ReadEntry(xmlDoc *doc, xmlNode *node) {
+ xmlNode *cur_node = NULL;
+ xmlChar *node_content;
+ cTVDBActor *actor = new cTVDBActor();
+ for (cur_node = node; cur_node; cur_node = cur_node->next) {
+ if (cur_node->type == XML_ELEMENT_NODE) {
+ node_content = xmlNodeListGetString(doc, cur_node->xmlChildrenNode, 1);
+ if (!node_content)
+ continue;
+ if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Image")) {
+ actor->thumbUrl = mirrors->GetMirrorBanner() + (const char *)node_content;
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Name")) {
+ actor->name = (const char *)node_content;
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Role")) {
+ actor->role = (const char *)node_content;
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"id")) {
+ actor->id = atoi((const char *)node_content);
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"SortOrder")) {
+ actor->sortOrder = atoi((const char *)node_content);
+ }
+ xmlFree(node_content);
+ }
+ }
+ return actor;
+} \ No newline at end of file
diff --git a/scraper/thetvdbscraper/tvdbactor.h b/scraper/thetvdbscraper/tvdbactor.h
new file mode 100644
index 0000000..ed77199
--- /dev/null
+++ b/scraper/thetvdbscraper/tvdbactor.h
@@ -0,0 +1,54 @@
+#include <iostream>
+#include <string>
+#include <sstream>
+#include <vector>
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+#include "tvdbmirrors.h"
+using namespace std;
+// --- cTVDBActor -------------------------------------------------------------
+class cTVDBActor {
+ cTVDBActor(void) {
+ thumbUrl = "";
+ name = "";
+ role = "";
+ id = 0;
+ sortOrder = 0;
+ thumbUrlWidth = 300;
+ thumbUrlHeight = 450;
+ };
+ string thumbUrl;
+ int thumbUrlWidth;
+ int thumbUrlHeight;
+ string name;
+ string role;
+ int id;
+ int sortOrder;
+ void Dump() {
+ cout << "Actor name: " << name << ", ID: " << id << endl;
+ cout << "Actor role: " << role << endl;
+ cout << "Actor thumb: " << thumbUrl << endl;
+ cout << "Actor SortOrder: " << sortOrder << endl;
+ }
+// --- cTVDBActors --------------------------------------------------------
+class cTVDBActors {
+ cTVDBMirrors *mirrors;
+ string language;
+ cTVDBActor *ReadEntry(xmlDoc *doc, xmlNode *node);
+ cTVDBActors(string language, cTVDBMirrors *mirrors);
+ virtual ~cTVDBActors(void);
+ void ParseXML(string xml, vector<cTVDBActor*> *actors);
diff --git a/scraper/thetvdbscraper/tvdbepisode.c b/scraper/thetvdbscraper/tvdbepisode.c
new file mode 100644
index 0000000..b9ac510
--- /dev/null
+++ b/scraper/thetvdbscraper/tvdbepisode.c
@@ -0,0 +1,147 @@
+#include <iostream>
+#include <string>
+#include <sstream>
+#include <vector>
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+#include "../../lib/curl.h"
+#include "tvdbepisode.h"
+using namespace std;
+cTVDBEpisode::cTVDBEpisode(void) {
+ id = 0;
+ seriesID = 0;
+ number = 0;
+ season = 0;
+ combinedEpisode = 0;
+ combinedSeason = 0;
+ name = "";
+ firstAired = "";
+ guestStars = "";
+ overview = "";
+ rating = 0.0;
+ imageUrl = "";
+ width = 400;
+ height = 225;
+ imgFlag = 0;
+ seasonId = 0;
+ lastUpdated = 0;
+cTVDBEpisode::cTVDBEpisode(int ID, string language, string apiKey, cTVDBMirrors *mirrors) {
+ this->language = language;
+ this->apiKey = apiKey;
+ this->mirrors = mirrors;
+ id = ID;
+ seriesID = 0;
+ number = 0;
+ season = 0;
+ combinedEpisode = 0;
+ combinedSeason = 0;
+ name = "";
+ firstAired = "";
+ guestStars = "";
+ overview = "";
+ rating = 0.0;
+ imageUrl = "";
+ width = 400;
+ height = 225;
+ imgFlag = 0;
+ seasonId = 0;
+ lastUpdated = 0;
+void cTVDBEpisode::ReadEpisode(void) {
+ stringstream url;
+ url << mirrors->GetMirrorXML() << "/api/" << apiKey << "/episodes/" << id << "/" << language << ".xml";
+ string episodeXML;
+ if (curl.GetUrl(url.str().c_str(), &episodeXML)) {
+ ParseXML(episodeXML);
+ }
+void cTVDBEpisode::ParseXML(string xml) {
+ xmlDoc *doc = xmlReadMemory(xml.c_str(), xml.size(), "noname.xml", NULL, 0);
+ if (doc == NULL)
+ return;
+ //Root Element has to be <Data>
+ xmlNode *node = NULL;
+ node = xmlDocGetRootElement(doc);
+ if (!(node && !xmlStrcmp(node->name, (const xmlChar *)"Data"))) {
+ xmlFreeDoc(doc);
+ return;
+ }
+ //Looping through episodes
+ node = node->children;
+ xmlNode *cur_node = NULL;
+ for (cur_node = node; cur_node; cur_node = cur_node->next) {
+ if ((cur_node->type == XML_ELEMENT_NODE) && !xmlStrcmp(cur_node->name, (const xmlChar *)"Episode")) {
+ ReadEpisodeFromXML(doc, cur_node->children, mirrors);
+ }
+ }
+ xmlFreeDoc(doc);
+void cTVDBEpisode::ReadEpisodeFromXML(xmlDoc *myDoc, xmlNode *node, cTVDBMirrors *mirrors) {
+ xmlNode *cur_node = NULL;
+ xmlChar *node_content;
+ for (cur_node = node; cur_node; cur_node = cur_node->next) {
+ if (cur_node->type == XML_ELEMENT_NODE) {
+ node_content = xmlNodeListGetString(myDoc, cur_node->xmlChildrenNode, 1);
+ if (!node_content)
+ continue;
+ if (!xmlStrcmp(cur_node->name, (const xmlChar *)"id")) {
+ id = atoi((const char *)node_content);
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"EpisodeNumber")) {
+ number = atoi((const char *)node_content);
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"seriesid")) {
+ seriesID = atoi((const char *)node_content);
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"SeasonNumber")) {
+ season = atoi((const char *)node_content);
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Combined_episodenumber")) {
+ combinedEpisode = atoi((const char *)node_content);
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Combined_season")) {
+ combinedSeason = atoi((const char *)node_content);
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"EpisodeName")) {
+ name = (const char *)node_content;
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"FirstAired")) {
+ firstAired = (const char *)node_content;
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"GuestStars")) {
+ guestStars = (const char *)node_content;
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Overview")) {
+ overview = (const char *)node_content;
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Rating")) {
+ rating = atof((const char *)node_content);
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"filename")) {
+ imageUrl = mirrors->GetMirrorBanner() + (const char *)node_content;
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"thumb_width")) {
+ width = atoi((const char *)node_content);
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"thumb_height")) {
+ height = atoi((const char *)node_content);
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"EpImgFlag")) {
+ imgFlag = atoi((const char *)node_content);
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"seasonid")) {
+ seasonId = atoi((const char *)node_content);
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"lastupdated")) {
+ lastUpdated = atoi((const char *)node_content);
+ }
+ xmlFree(node_content);
+ }
+ }
+void cTVDBEpisode::Dump() {
+ cout << "----------------------------------------" << endl;
+ cout << "Season " << season << ", Episode " << number << ", Name: " << name << ", ID: " << id << ", SeasonID " << seasonId << endl;
+ cout << "combinedSeason: " << combinedSeason << ", combinedEpisode: " << combinedEpisode << endl;
+ cout << "First Aired: " << firstAired << endl;
+ cout << "Guest Stars: " << guestStars << endl;
+ cout << "Overview: " << overview << endl;
+ cout << "Rating: " << rating << endl;
+ cout << "imageUrl: " << imageUrl << ", Size: " << width << " x " << height << ", Flag: " << imgFlag << endl;
+ cout << "Last Update: " << lastUpdated << endl;
diff --git a/scraper/thetvdbscraper/tvdbepisode.h b/scraper/thetvdbscraper/tvdbepisode.h
new file mode 100644
index 0000000..945278b
--- /dev/null
+++ b/scraper/thetvdbscraper/tvdbepisode.h
@@ -0,0 +1,45 @@
+#include <iostream>
+#include <string>
+#include <sstream>
+#include <vector>
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+#include "tvdbmirrors.h"
+using namespace std;
+// --- cTVDBEpisode -------------------------------------------------------------
+class cTVDBEpisode {
+ string apiKey;
+ cTVDBMirrors *mirrors;
+ string language;
+ void ParseXML(string xml);
+ cTVDBEpisode(void);
+ cTVDBEpisode(int ID, string language, string apiKey, cTVDBMirrors *mirrors);
+ int id;
+ int seriesID;
+ int number;
+ int season;
+ int combinedSeason;
+ int combinedEpisode;
+ string name;
+ string firstAired;
+ string guestStars;
+ string overview;
+ float rating;
+ string imageUrl;
+ int width;
+ int height;
+ int imgFlag;
+ int seasonId;
+ int lastUpdated;
+ void ReadEpisode(void);
+ void ReadEpisodeFromXML(xmlDoc *myDoc, xmlNode *node, cTVDBMirrors *mirrors);
+ void Dump();
+#endif //cTVDBEpisode
diff --git a/scraper/thetvdbscraper/tvdbmedia.c b/scraper/thetvdbscraper/tvdbmedia.c
new file mode 100644
index 0000000..7495f3b
--- /dev/null
+++ b/scraper/thetvdbscraper/tvdbmedia.c
@@ -0,0 +1,202 @@
+#include "tvdbmedia.h"
+using namespace std;
+cTVDBSeriesMedia::cTVDBSeriesMedia(string language, cTVDBMirrors *mirrors) {
+ this->language = language;
+ fallbackLanguage = "en";
+ this->mirrors = mirrors;
+cTVDBSeriesMedia::~cTVDBSeriesMedia() {
+void cTVDBSeriesMedia::ParseXML(string xml, vector<cTVDBBanner*> *banners, vector<cTVDBFanart*> *fanarts, vector<cTVDBPoster*> *posters, vector<cTVDBSeasonPoster*> *seasonPosters) {
+ xmlDoc *doc = xmlReadMemory(xml.c_str(), xml.size(), "noname.xml", NULL, 0);
+ this->banners = banners;
+ this->fanarts = fanarts;
+ this->posters = posters;
+ this->seasonPosters = seasonPosters;
+ if (doc == NULL)
+ return;
+ //Root Element has to be <Banners>
+ xmlNode *node = NULL;
+ node = xmlDocGetRootElement(doc);
+ if (!(node && !xmlStrcmp(node->name, (const xmlChar *)"Banners"))) {
+ xmlFreeDoc(doc);
+ return;
+ }
+ //Looping through banners
+ node = node->children;
+ xmlNode *cur_node = NULL;
+ for (cur_node = node; cur_node; cur_node = cur_node->next) {
+ if ((cur_node->type == XML_ELEMENT_NODE) && !xmlStrcmp(cur_node->name, (const xmlChar *)"Banner")) {
+ ReadEntry(doc, cur_node->children);
+ }
+ }
+ xmlFreeDoc(doc);
+void cTVDBSeriesMedia::ReadEntry(xmlDoc *doc, xmlNode *node) {
+ xmlNode *cur_node = NULL;
+ xmlChar *node_content;
+ for (cur_node = node; cur_node; cur_node = cur_node->next) {
+ if (cur_node->type == XML_ELEMENT_NODE) {
+ node_content = xmlNodeListGetString(doc, cur_node->xmlChildrenNode, 1);
+ if (!node_content)
+ continue;
+ if (!xmlStrcmp(cur_node->name, (const xmlChar *)"BannerType")) {
+ if (!xmlStrcmp(node_content, (const xmlChar *)"poster"))
+ ReadPoster(doc, node);
+ else if (!xmlStrcmp(node_content, (const xmlChar *)"fanart"))
+ ReadFanart(doc, node);
+ else if (!xmlStrcmp(node_content, (const xmlChar *)"series"))
+ ReadBanner(doc, node);
+ else if (!xmlStrcmp(node_content, (const xmlChar *)"season"))
+ ReadSeasonPoster(doc, node);
+ }
+ xmlFree(node_content);
+ }
+ }
+void cTVDBSeriesMedia::ReadFanart(xmlDoc *doc, xmlNode *node) {
+ xmlNode *cur_node = NULL;
+ xmlChar *node_content;
+ cTVDBFanart *fanart = new cTVDBFanart();
+ //default size
+ fanart->width = 1920;
+ fanart->height = 1080;
+ for (cur_node = node; cur_node; cur_node = cur_node->next) {
+ if (cur_node->type == XML_ELEMENT_NODE) {
+ node_content = xmlNodeListGetString(doc, cur_node->xmlChildrenNode, 1);
+ if (!node_content)
+ continue;
+ if (!xmlStrcmp(cur_node->name, (const xmlChar *)"BannerPath")) {
+ fanart->url = mirrors->GetMirrorBanner() + (const char *)node_content;
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Language")) {
+ fanart->language = (const char *)node_content;
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Rating")) {
+ fanart->rating = atof((const char *)node_content);
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"ThumbnailPath")) {
+ fanart->thumbUrl = mirrors->GetMirrorBanner() + (const char *)node_content;
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"BannerType2")) {
+ string fanartSize = (const char *)node_content;
+ size_t posX = fanartSize.find("x");
+ fanart->width = atoi(fanartSize.substr(0, posX).c_str());
+ fanart->height = atoi(fanartSize.substr(posX+1).c_str());
+ }
+ xmlFree(node_content);
+ }
+ }
+ if ( (fanart->url.size() == 0) ||
+ ((fanart-> && (fanart-> {
+ delete fanart;
+ return;
+ }
+ fanarts->push_back(fanart);
+void cTVDBSeriesMedia::ReadPoster(xmlDoc *doc, xmlNode *node) {
+ xmlNode *cur_node = NULL;
+ xmlChar *node_content;
+ cTVDBPoster *poster = new cTVDBPoster();
+ //default size
+ poster->width = 400;
+ poster->height = 578;
+ for (cur_node = node; cur_node; cur_node = cur_node->next) {
+ if (cur_node->type == XML_ELEMENT_NODE) {
+ node_content = xmlNodeListGetString(doc, cur_node->xmlChildrenNode, 1);
+ if (!node_content)
+ continue;
+ if (!xmlStrcmp(cur_node->name, (const xmlChar *)"BannerPath")) {
+ poster->url = mirrors->GetMirrorBanner() + (const char *)node_content;
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Language")) {
+ poster->language = (const char *)node_content;
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Rating")) {
+ poster->rating = atof((const char *)node_content);
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"BannerType2")) {
+ string posterSize = (const char *)node_content;
+ size_t posX = posterSize.find("x");
+ poster->width = atoi(posterSize.substr(0, posX).c_str());
+ poster->height = atoi(posterSize.substr(posX+1).c_str());
+ }
+ xmlFree(node_content);
+ }
+ }
+ if ( (poster->url.size() == 0) ||
+ ((poster-> && (poster-> {
+ delete poster;
+ return;
+ }
+ posters->push_back(poster);
+void cTVDBSeriesMedia::ReadSeasonPoster(xmlDoc *doc, xmlNode *node) {
+ xmlNode *cur_node = NULL;
+ xmlChar *node_content;
+ cTVDBSeasonPoster *poster = new cTVDBSeasonPoster();
+ poster->width = 400;
+ poster->height = 578;
+ bool add = true;
+ for (cur_node = node; cur_node; cur_node = cur_node->next) {
+ if (cur_node->type == XML_ELEMENT_NODE) {
+ node_content = xmlNodeListGetString(doc, cur_node->xmlChildrenNode, 1);
+ if (!node_content)
+ continue;
+ if (!xmlStrcmp(cur_node->name, (const xmlChar *)"BannerPath")) {
+ poster->url = mirrors->GetMirrorBanner() + (const char *)node_content;
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Language")) {
+ poster->language = (const char *)node_content;
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Rating")) {
+ poster->rating = atof((const char *)node_content);
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Season")) {
+ poster->season = atoi((const char *)node_content);
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"BannerType2")) {
+ string bt2 = (const char *)node_content;
+ if (!"seasonwide"))
+ add = false;
+ }
+ xmlFree(node_content);
+ }
+ }
+ if ( (poster->url.size() == 0) ||
+ (poster->season == 0) ||
+ (!add) ||
+ ((poster-> && (poster-> {
+ delete poster;
+ return;
+ }
+ seasonPosters->push_back(poster);
+void cTVDBSeriesMedia::ReadBanner(xmlDoc *doc, xmlNode *node) {
+ xmlNode *cur_node = NULL;
+ xmlChar *node_content;
+ cTVDBBanner *banner = new cTVDBBanner();
+ banner->width = 758;
+ banner->height = 140;
+ for (cur_node = node; cur_node; cur_node = cur_node->next) {
+ if (cur_node->type == XML_ELEMENT_NODE) {
+ node_content = xmlNodeListGetString(doc, cur_node->xmlChildrenNode, 1);
+ if (!node_content)
+ continue;
+ if (!xmlStrcmp(cur_node->name, (const xmlChar *)"BannerPath")) {
+ banner->url = mirrors->GetMirrorBanner() + (const char *)node_content;
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Language")) {
+ banner->language = (const char *)node_content;
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Rating")) {
+ banner->rating = atof((const char *)node_content);
+ }
+ xmlFree(node_content);
+ }
+ }
+ if ( (banner->url.size() == 0) ||
+ ((banner-> && (banner-> {
+ delete banner;
+ return;
+ }
+ banners->push_back(banner);
diff --git a/scraper/thetvdbscraper/tvdbmedia.h b/scraper/thetvdbscraper/tvdbmedia.h
new file mode 100644
index 0000000..4277c39
--- /dev/null
+++ b/scraper/thetvdbscraper/tvdbmedia.h
@@ -0,0 +1,111 @@
+#include <iostream>
+#include <string>
+#include <sstream>
+#include <vector>
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+#include "tvdbmirrors.h"
+using namespace std;
+// --- cTVDBMedia -------------------------------------------------------------
+class cTVDBMedia {
+ cTVDBMedia(void) {
+ url = "";
+ language = "";
+ width = 0;
+ height = 0;
+ rating = 0.0;
+ };
+ string url;
+ string language;
+ int width, height;
+ double rating;
+// --- cTVDBFanart -------------------------------------------------------------
+class cTVDBFanart : public cTVDBMedia {
+ cTVDBFanart(void) {
+ thumbUrl = "";
+ };
+ string thumbUrl;
+ void Dump() {
+ cout << "Url: " << url << endl;
+ cout << "Url Thumbnail: " << thumbUrl << endl;
+ cout << "Language: " << language << endl;
+ cout << "Size: " << width << " x " << height << endl;
+ cout << "Rating: " << rating << endl;
+ };
+// --- cTVDBPoster -------------------------------------------------------------
+class cTVDBPoster : public cTVDBMedia {
+ cTVDBPoster(void) {
+ };
+ void Dump() {
+ cout << "Url: " << url << endl;
+ cout << "Language: " << language << endl;
+ cout << "Size: " << width << " x " << height << endl;
+ cout << "Rating: " << rating << endl;
+ };
+// --- cTVDBSeasonPoster -------------------------------------------------------------
+class cTVDBSeasonPoster : public cTVDBMedia {
+ cTVDBSeasonPoster(void) {
+ season = 0;
+ };
+ int season;
+ void Dump() {
+ cout << "Url: " << url << endl;
+ cout << "Season: " << season << endl;
+ cout << "Language: " << language << endl;
+ cout << "Size: " << width << " x " << height << endl;
+ cout << "Rating: " << rating << endl;
+ };
+// --- cTVDBBanner -------------------------------------------------------------
+class cTVDBBanner : public cTVDBMedia {
+ cTVDBBanner(void) {
+ };
+ int season;
+ void Dump() {
+ cout << "Url: " << url << endl;
+ cout << "Language: " << language << endl;
+ cout << "Size: " << width << " x " << height << endl;
+ cout << "Rating: " << rating << endl;
+ };
+// --- cTVDBSeriesMedia --------------------------------------------------------
+class cTVDBSeriesMedia {
+ string language;
+ string fallbackLanguage;
+ cTVDBMirrors *mirrors;
+ vector<cTVDBBanner*> *banners;
+ vector<cTVDBFanart*> *fanarts;
+ vector<cTVDBPoster*> *posters;
+ vector<cTVDBSeasonPoster*> *seasonPosters;
+ void ReadEntry(xmlDoc *doc, xmlNode *node);
+ void ReadFanart(xmlDoc *doc, xmlNode *node);
+ void ReadPoster(xmlDoc *doc, xmlNode *node);
+ void ReadBanner(xmlDoc *doc, xmlNode *node);
+ void ReadSeasonPoster(xmlDoc *doc, xmlNode *node);
+ cTVDBSeriesMedia(string language, cTVDBMirrors *mirrors);
+ virtual ~cTVDBSeriesMedia(void);
+ void ParseXML(string xml, vector<cTVDBBanner*> *banners, vector<cTVDBFanart*> *fanarts, vector<cTVDBPoster*> *posters, vector<cTVDBSeasonPoster*> *seasonPosters);
diff --git a/scraper/thetvdbscraper/tvdbmirrors.c b/scraper/thetvdbscraper/tvdbmirrors.c
new file mode 100644
index 0000000..2c063ec
--- /dev/null
+++ b/scraper/thetvdbscraper/tvdbmirrors.c
@@ -0,0 +1,88 @@
+#include "tvdbmirrors.h"
+using namespace std;
+cTVDBMirrors::cTVDBMirrors(void) {
+cTVDBMirrors::~cTVDBMirrors() {
+string cTVDBMirrors::GetMirrorXML(void) {
+ if (xmlmirrors.size() == 0)
+ return "";
+ int randMirror = rand() % xmlmirrors.size();
+ return xmlmirrors[randMirror];
+string cTVDBMirrors::GetMirrorBanner(void) {
+ if (bannermirrors.size() == 0)
+ return "";
+ int randMirror = rand() % bannermirrors.size();
+ return bannermirrors[randMirror] + "/banners/";
+string cTVDBMirrors::GetMirrorZip(void) {
+ if (zipmirrors.size() == 0)
+ return "";
+ int randMirror = rand() % zipmirrors.size();
+ return zipmirrors[randMirror];
+bool cTVDBMirrors::ParseXML(string xml) {
+ xmlDoc *doc = xmlReadMemory(xml.c_str(), xml.size(), "noname.xml", NULL, 0);
+ if (doc == NULL)
+ return false;
+ //Root Element has to be <Mirrors>
+ xmlNode *node = NULL;
+ node = xmlDocGetRootElement(doc);
+ if (!(node && !xmlStrcmp(node->name, (const xmlChar *)"Mirrors"))) {
+ xmlFreeDoc(doc);
+ return false;
+ }
+ //Loop through <Mirror>
+ node = node->children;
+ xmlNode *cur_node = NULL;
+ bool ok = false;
+ for (cur_node = node; cur_node; cur_node = cur_node->next) {
+ if ((cur_node->type == XML_ELEMENT_NODE) && !xmlStrcmp(cur_node->name, (const xmlChar *)"Mirror")) {
+ ok = ReadEntry(doc, cur_node->children);
+ }
+ }
+ xmlFreeDoc(doc);
+ return ok;
+bool cTVDBMirrors::ReadEntry(xmlDoc *doc, xmlNode *node) {
+ xmlNode *cur_node = NULL;
+ xmlChar *node_content;
+ string path = "";
+ int typemask = 0;
+ for (cur_node = node; cur_node; cur_node = cur_node->next) {
+ if (cur_node->type == XML_ELEMENT_NODE) {
+ node_content = xmlNodeListGetString(doc, cur_node->xmlChildrenNode, 1);
+ if (!node_content)
+ continue;
+ if (!xmlStrcmp(cur_node->name, (const xmlChar *)"mirrorpath")) {
+ path = (const char *)node_content;
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"typemask")) {
+ typemask = atoi((const char *)node_content);
+ }
+ xmlFree(node_content);
+ }
+ }
+ return CreateMirror(path, typemask);
+bool cTVDBMirrors::CreateMirror(string path, int typemask) {
+ if (path.size() < 1)
+ return false;
+ if (typemask < 1 || typemask > 7)
+ return false;
+ if (typemask & 1)
+ xmlmirrors.push_back(path);
+ if (typemask & 2)
+ bannermirrors.push_back(path);
+ if (typemask & 4)
+ zipmirrors.push_back(path);
+ return true;
+} \ No newline at end of file
diff --git a/scraper/thetvdbscraper/tvdbmirrors.h b/scraper/thetvdbscraper/tvdbmirrors.h
new file mode 100644
index 0000000..cf7dc62
--- /dev/null
+++ b/scraper/thetvdbscraper/tvdbmirrors.h
@@ -0,0 +1,32 @@
+#include <iostream>
+#include <string>
+#include <sstream>
+#include <vector>
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+using namespace std;
+// --- cTVDBMirrors -------------------------------------------------------------
+class cTVDBMirrors {
+ vector<string> xmlmirrors;
+ vector<string> bannermirrors;
+ vector<string> zipmirrors;
+ bool ReadEntry(xmlDoc *doc, xmlNode *node);
+ bool CreateMirror(string path, int typemask);
+ cTVDBMirrors(void);
+ virtual ~cTVDBMirrors(void);
+ bool ParseXML(string xml);
+ string GetMirrorXML(void);
+ string GetMirrorBanner(void);
+ string GetMirrorZip(void);
diff --git a/scraper/thetvdbscraper/tvdbseries.c b/scraper/thetvdbscraper/tvdbseries.c
new file mode 100644
index 0000000..21dc78b
--- /dev/null
+++ b/scraper/thetvdbscraper/tvdbseries.c
@@ -0,0 +1,281 @@
+#include "../../lib/curl.h"
+#include "tvdbseries.h"
+using namespace std;
+cTVDBSeries::cTVDBSeries(int ID, string language, string apiKey, cTVDBMirrors *mirrors) {
+ this->language = language;
+ this->apiKey = apiKey;
+ this->mirrors = mirrors;
+ seriesID = ID;
+ name = "";
+ banner = "";
+ fanart = "";
+ poster = "";
+ overview = "";
+ firstAired = "";
+ network = "";
+ imbdid = "";
+ genre = "";
+ rating = 0.0;
+ status = "";
+ currentEpisode = 0;
+ currentActor = 0;
+ currentFanart = 0;
+ currentPoster = 0;
+ currentSeasonPoster = 0;
+ currentBanner = 0;
+cTVDBSeries::~cTVDBSeries() {
+ for(std::vector<cTVDBEpisode*>::const_iterator it = episodes.begin(); it != episodes.end(); it++) {
+ delete *it;
+ }
+ episodes.clear();
+ for(std::vector<cTVDBActor*>::const_iterator it = actors.begin(); it != actors.end(); it++) {
+ delete *it;
+ }
+ actors.clear();
+ for(std::vector<cTVDBFanart*>::const_iterator it = fanarts.begin(); it != fanarts.end(); it++) {
+ delete *it;
+ }
+ fanarts.clear();
+ for(std::vector<cTVDBPoster*>::const_iterator it = posters.begin(); it != posters.end(); it++) {
+ delete *it;
+ }
+ posters.clear();
+ for(std::vector<cTVDBSeasonPoster*>::const_iterator it = seasonPosters.begin(); it != seasonPosters.end(); it++) {
+ delete *it;
+ }
+ seasonPosters.clear();
+ for(std::vector<cTVDBBanner*>::const_iterator it = banners.begin(); it != banners.end(); it++) {
+ delete *it;
+ }
+ banners.clear();
+bool cTVDBSeries::ReadSeries(void) {
+ stringstream url;
+ url << mirrors->GetMirrorXML() << "/api/" << apiKey << "/series/" << seriesID << "/all/" << language << ".xml";
+ string seriesXML;
+ if (curl.GetUrl(url.str().c_str(), &seriesXML)) {
+ ParseXML(seriesXML);
+ return true;
+ }
+ return false;
+void cTVDBSeries::ReadMedia(void) {
+ stringstream url;
+ url << mirrors->GetMirrorXML() << "/api/" << apiKey << "/series/" << seriesID << "/banners.xml";
+ string bannersXML;
+ if (curl.GetUrl(url.str().c_str(), &bannersXML)) {
+ cTVDBSeriesMedia med(language, mirrors);
+ med.ParseXML(bannersXML, &banners, &fanarts, &posters, &seasonPosters);
+ }
+void cTVDBSeries::ReadActors(void) {
+ stringstream url;
+ url << mirrors->GetMirrorXML() << "/api/" << apiKey << "/series/" << seriesID << "/actors.xml";
+ string actorsXML;
+ if (curl.GetUrl(url.str().c_str(), &actorsXML)) {
+ cTVDBActors act(language, mirrors);
+ act.ParseXML(actorsXML, &actors);
+ }
+void cTVDBSeries::ParseXML(string xml) {
+ xmlDoc *doc = xmlReadMemory(xml.c_str(), xml.size(), "noname.xml", NULL, 0);
+ if (doc == NULL)
+ return;
+ //Root Element has to be <Data>
+ xmlNode *node = NULL;
+ node = xmlDocGetRootElement(doc);
+ if (!(node && !xmlStrcmp(node->name, (const xmlChar *)"Data"))) {
+ xmlFreeDoc(doc);
+ return;
+ }
+ //Looping through episodes
+ node = node->children;
+ xmlNode *cur_node = NULL;
+ for (cur_node = node; cur_node; cur_node = cur_node->next) {
+ if ((cur_node->type == XML_ELEMENT_NODE) && !xmlStrcmp(cur_node->name, (const xmlChar *)"Series")) {
+ ReadSeriesData(doc, cur_node->children);
+ } else if ((cur_node->type == XML_ELEMENT_NODE) && !xmlStrcmp(cur_node->name, (const xmlChar *)"Episode")) {
+ cTVDBEpisode *episode = new cTVDBEpisode();
+ episode->ReadEpisodeFromXML(doc, cur_node->children, mirrors);
+ episodes.push_back(episode);
+ }
+ }
+ xmlFreeDoc(doc);
+void cTVDBSeries::ReadSeriesData(xmlDoc *doc, xmlNode *node) {
+ xmlNode *cur_node = NULL;
+ xmlChar *node_content;
+ for (cur_node = node; cur_node; cur_node = cur_node->next) {
+ if (cur_node->type == XML_ELEMENT_NODE) {
+ node_content = xmlNodeListGetString(doc, cur_node->xmlChildrenNode, 1);
+ if (!node_content)
+ continue;
+ if (!xmlStrcmp(cur_node->name, (const xmlChar *)"FirstAired")) {
+ firstAired = (const char *)node_content;
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Genre")) {
+ genre = (const char *)node_content;
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"IMDB_ID")) {
+ imbdid = (const char *)node_content;
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Network")) {
+ network = (const char *)node_content;
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Overview")) {
+ overview = (const char *)node_content;
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Rating")) {
+ rating = atof((const char *)node_content);
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"SeriesName")) {
+ name = (const char *)node_content;
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Status")) {
+ status = (const char *)node_content;
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"banner")) {
+ banner = mirrors->GetMirrorBanner() + (const char *)node_content;
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"fanart")) {
+ fanart = mirrors->GetMirrorBanner() + (const char *)node_content;
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"poster")) {
+ poster = mirrors->GetMirrorBanner() + (const char *)node_content;
+ } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"lastupdated")) {
+ lastUpdated = atoi((const char *)node_content);
+ }
+ xmlFree(node_content);
+ }
+ }
+cTVDBEpisode *cTVDBSeries::GetEpisode(void) {
+ int numEpisodes = episodes.size();
+ if ((numEpisodes < 1) || (currentEpisode >= numEpisodes))
+ return NULL;
+ cTVDBEpisode *epi = episodes[currentEpisode];
+ currentEpisode++;
+ return epi;
+cTVDBActor *cTVDBSeries::GetActor(void) {
+ int numActors = actors.size();
+ if ((numActors < 1) || (currentActor >= numActors))
+ return NULL;
+ cTVDBActor *act = actors[currentActor];
+ currentActor++;
+ return act;
+cTVDBFanart *cTVDBSeries::GetFanart(void) {
+ int numFanarts = fanarts.size();
+ if ((numFanarts < 1) || (currentFanart >= numFanarts))
+ return NULL;
+ cTVDBFanart *fan = fanarts[currentFanart];
+ currentFanart++;
+ return fan;
+cTVDBPoster *cTVDBSeries::GetPoster(void) {
+ int numPosters = posters.size();
+ if ((numPosters < 1) || (currentPoster >= numPosters))
+ return NULL;
+ cTVDBPoster *pos = posters[currentPoster];
+ currentPoster++;
+ return pos;
+cTVDBSeasonPoster *cTVDBSeries::GetSeasonPoster(void) {
+ int numSeasonPosters = seasonPosters.size();
+ if ((numSeasonPosters < 1) || (currentSeasonPoster >= numSeasonPosters))
+ return NULL;
+ cTVDBSeasonPoster *pos = seasonPosters[currentSeasonPoster];
+ currentSeasonPoster++;
+ return pos;
+cTVDBSeasonPoster *cTVDBSeries::GetSeasonPoster(int season) {
+ int numSeasonPoster = seasonPosters.size();
+ for (int i=0; i<numSeasonPoster; i++) {
+ if (seasonPosters[i]->season == season)
+ return seasonPosters[i];
+ }
+ return NULL;
+cTVDBBanner *cTVDBSeries::GetBanner(void) {
+ int numBanners = banners.size();
+ if ((numBanners < 1) || (currentBanner >= numBanners))
+ return NULL;
+ cTVDBBanner *ban = banners[currentBanner];
+ currentBanner++;
+ return ban;
+bool cTVDBSeries::GetPartAndSeason(int episodeId, int &season, int &part) {
+ for (vector<cTVDBEpisode*>::iterator ep = episodes.begin(); ep != episodes.end(); ep++) {
+ cTVDBEpisode *episode = *ep;
+ if(episode->id == episodeId) {
+ season = episode->season;
+ part = episode->number;
+ return true;
+ }
+ }
+ return false;
+void cTVDBSeries::Dump(int maxEntries) {
+ cout << "--------------------------- Series Info ----------------------------------" << endl;
+ cout << "series " << name << ", ID: " << seriesID <<endl;
+ cout << "Overview: " << overview << endl;
+ cout << "Banner: " << banner << endl;
+ cout << "Poster: " << poster << endl;
+ cout << "Fanart: " << fanart << endl;
+ cout << "imdb: " << imbdid << endl;
+ cout << "FirstAired: " << firstAired << endl;
+ cout << "Network: " << network << endl;
+ cout << "Status: " << status << endl;
+ cout << "lastUpdated: " << lastUpdated << endl;
+ cout << "Genre: " << genre << endl;
+ cout << "Rating: " << rating << endl;
+ int size = episodes.size();
+ cout << "--------------------------- " << size << " episodes ----------------------------------" << endl;
+ for (int i=0; i<size; i++) {
+ if (maxEntries && (i == maxEntries)) break;
+ episodes[i]->Dump();
+ }
+ size = actors.size();
+ cout << "--------------------------- " << size << " actors ----------------------------------" << endl;
+ for (int i=0; i<size; i++) {
+ if (maxEntries && (i == maxEntries)) break;
+ actors[i]->Dump();
+ }
+ int numFanart = fanarts.size();
+ cout << "--------------------------- " << numFanart << " fanarts ----------------------------------" << endl;
+ for (int i=0; i<numFanart; i++) {
+ if (maxEntries && (i == maxEntries)) break;
+ fanarts[i]->Dump();
+ }
+ int numPoster = posters.size();
+ cout << "--------------------------- " << numPoster << " posters ----------------------------------" << endl;
+ for (int i=0; i<numPoster; i++) {
+ if (maxEntries && (i == maxEntries)) break;
+ posters[i]->Dump();
+ }
+ int numSeasonPoster = seasonPosters.size();
+ cout << "--------------------------- " << numSeasonPoster << " season posters ---------------------------" << endl;
+ for (int i=0; i<numSeasonPoster; i++) {
+ if (maxEntries && (i == maxEntries)) break;
+ seasonPosters[i]->Dump();
+ }
+ int numBanner = banners.size();
+ cout << "--------------------------- " << numBanner << " banners ----------------------------------" << endl;
+ for (int i=0; i<numBanner; i++) {
+ if (maxEntries && (i == maxEntries)) break;
+ banners[i]->Dump();
+ }
diff --git a/scraper/thetvdbscraper/tvdbseries.h b/scraper/thetvdbscraper/tvdbseries.h
new file mode 100644
index 0000000..bf7a84d
--- /dev/null
+++ b/scraper/thetvdbscraper/tvdbseries.h
@@ -0,0 +1,69 @@
+#include <iostream>
+#include <string>
+#include <sstream>
+#include <vector>
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+#include "tvdbmirrors.h"
+#include "tvdbactor.h"
+#include "tvdbmedia.h"
+#include "tvdbepisode.h"
+using namespace std;
+// --- cTVDBSeries -------------------------------------------------------------
+class cTVDBSeries {
+ string apiKey;
+ cTVDBMirrors *mirrors;
+ string language;
+ vector<cTVDBEpisode*> episodes;
+ vector<cTVDBActor*> actors;
+ vector<cTVDBFanart*> fanarts;
+ vector<cTVDBPoster*> posters;
+ vector<cTVDBSeasonPoster*> seasonPosters;
+ vector<cTVDBBanner*> banners;
+ int currentEpisode;
+ int currentActor;
+ int currentFanart;
+ int currentPoster;
+ int currentSeasonPoster;
+ int currentBanner;
+ void ParseXML(string xml);
+ void ReadSeriesData(xmlDoc *doc, xmlNode *node);
+ cTVDBSeries(int ID, string language, string apiKey, cTVDBMirrors *mirrors);
+ virtual ~cTVDBSeries(void);
+ bool ReadSeries(void);
+ void ReadMedia(void);
+ void ReadActors(void);
+ cTVDBEpisode *GetEpisode(void);
+ cTVDBActor *GetActor(void);
+ cTVDBFanart *GetFanart(void);
+ cTVDBPoster *GetPoster(void);
+ cTVDBSeasonPoster *GetSeasonPoster(void);
+ cTVDBSeasonPoster *GetSeasonPoster(int season);
+ cTVDBBanner *GetBanner(void);
+ bool GetPartAndSeason(int episodeId, int &season, int &part);
+ int seriesID;
+ int lastUpdated;
+ string name;
+ string banner;
+ string fanart;
+ string poster;
+ string overview;
+ string firstAired;
+ string network;
+ string imbdid;
+ string genre;
+ float rating;
+ string status;
+ void Dump(int maxEntries = 0);