diff options
author | horchi <vdr@jwendel.de> | 2017-03-05 16:39:28 +0100 |
---|---|---|
committer | horchi <vdr@jwendel.de> | 2017-03-05 16:39:28 +0100 |
commit | e2a48d8701f91b8e24fbe9e99e91eb72a87bb749 (patch) | |
tree | 726f70554b4ca985a09ef6e30a7fdc8df089993c /scraper | |
download | vdr-epg-daemon-e2a48d8701f91b8e24fbe9e99e91eb72a87bb749.tar.gz vdr-epg-daemon-e2a48d8701f91b8e24fbe9e99e91eb72a87bb749.tar.bz2 |
git init1.1.103
Diffstat (limited to 'scraper')
-rw-r--r-- | scraper/themoviedbscraper/moviedbactor.c | 49 | ||||
-rw-r--r-- | scraper/themoviedbscraper/moviedbactor.h | 44 | ||||
-rw-r--r-- | scraper/themoviedbscraper/moviedbmovie.c | 255 | ||||
-rw-r--r-- | scraper/themoviedbscraper/moviedbmovie.h | 68 | ||||
-rw-r--r-- | scraper/themoviedbscraper/themoviedbscraper.c | 103 | ||||
-rw-r--r-- | scraper/themoviedbscraper/themoviedbscraper.h | 31 | ||||
-rw-r--r-- | scraper/thetvdbscraper/thetvdbscraper.c | 188 | ||||
-rw-r--r-- | scraper/thetvdbscraper/thetvdbscraper.h | 39 | ||||
-rw-r--r-- | scraper/thetvdbscraper/tvdbactor.c | 66 | ||||
-rw-r--r-- | scraper/thetvdbscraper/tvdbactor.h | 54 | ||||
-rw-r--r-- | scraper/thetvdbscraper/tvdbepisode.c | 147 | ||||
-rw-r--r-- | scraper/thetvdbscraper/tvdbepisode.h | 45 | ||||
-rw-r--r-- | scraper/thetvdbscraper/tvdbmedia.c | 202 | ||||
-rw-r--r-- | scraper/thetvdbscraper/tvdbmedia.h | 111 | ||||
-rw-r--r-- | scraper/thetvdbscraper/tvdbmirrors.c | 88 | ||||
-rw-r--r-- | scraper/thetvdbscraper/tvdbmirrors.h | 32 | ||||
-rw-r--r-- | scraper/thetvdbscraper/tvdbseries.c | 281 | ||||
-rw-r--r-- | scraper/thetvdbscraper/tvdbseries.h | 69 |
18 files changed, 1872 insertions, 0 deletions
diff --git a/scraper/themoviedbscraper/moviedbactor.c b/scraper/themoviedbscraper/moviedbactor.c new file mode 100644 index 0000000..4e81af2 --- /dev/null +++ b/scraper/themoviedbscraper/moviedbactor.c @@ -0,0 +1,49 @@ + +#include <string> +#include <sstream> +#include <vector> +#include <iostream> +#include <jansson.h> +#include "moviedbactor.h" + +#include "../../tools/stringhelpers.h" + +using namespace std; + +cMovieDbActors::cMovieDbActors(string json, string actorsBaseUrl) { + this->json = json; + this->actorsBaseUrl = actorsBaseUrl; +} + +cMovieDbActors::~cMovieDbActors() { +} + +void cMovieDbActors::ParseJSON(vector<cMovieDBActor*> *actors) { + cJsonLoader jActors(json.c_str()); + if (!jActors.isObject()) { + return; + } + json_t *cast = jActors.objectByName("cast"); + if(!json_is_array(cast)) { + return; + } + size_t numActors = json_array_size(cast); + for (size_t i = 0; i < numActors; i++) { + json_t *jActor = json_array_get(cast, i); + if (!json_is_object(jActor)) { + return; + } + json_t *jId = json_object_get(jActor, "id"); + json_t *jName = json_object_get(jActor, "name"); + json_t *jRole = json_object_get(jActor, "character"); + json_t *jPath = json_object_get(jActor, "profile_path"); + if (!json_is_integer(jId) || !json_is_string(jName) || !json_is_string(jRole) || !json_is_string(jPath)) + return; + cMovieDBActor *actor = new cMovieDBActor(); + actor->id = json_integer_value(jId); + actor->name = json_string_value(jName); + actor->role = json_string_value(jRole); + actor->thumbUrl = actorsBaseUrl + json_string_value(jPath); + actors->push_back(actor); + } +} diff --git a/scraper/themoviedbscraper/moviedbactor.h b/scraper/themoviedbscraper/moviedbactor.h new file mode 100644 index 0000000..1b51682 --- /dev/null +++ b/scraper/themoviedbscraper/moviedbactor.h @@ -0,0 +1,44 @@ +#ifndef __TVSCRAPER_MOVIEDBACTORS_H +#define __TVSCRAPER_MOVIEDBACTORS_H + +using namespace std; + +// --- cMovieDBActor ------------------------------------------------------------- +class cMovieDBActor { +public: + cMovieDBActor(void) { + id = 0; + thumbUrl = ""; + name = ""; + role = ""; + width = 370; + height = 556; + }; + int id; + string thumbUrl; + int width; + int height; + string name; + string role; + void Dump(void) { + cout << "id: " << id << endl; + cout << "name: " << name << endl; + cout << "role: " << role << endl; + cout << "thumbnail: " << thumbUrl << endl; + }; +}; + +// --- cMovieDBActors ------------------------------------------------------------- + +class cMovieDbActors { +private: + string json; + string actorsBaseUrl; +public: + cMovieDbActors(string json, string actorsBaseUrl); + virtual ~cMovieDbActors(void); + void ParseJSON(vector<cMovieDBActor*> *actors); +}; + + +#endif //__TVSCRAPER_MOVIEDBACTORS_H diff --git a/scraper/themoviedbscraper/moviedbmovie.c b/scraper/themoviedbscraper/moviedbmovie.c new file mode 100644 index 0000000..8060930 --- /dev/null +++ b/scraper/themoviedbscraper/moviedbmovie.c @@ -0,0 +1,255 @@ + +#include <string> +#include <sstream> +#include <vector> +#include <algorithm> +#include <iostream> +#include <jansson.h> + +#include "../../lib/curl.h" +#include "../../tools/fuzzy.h" +#include "../../tools/stringhelpers.h" +#include "moviedbmovie.h" + +using namespace std; + +cMovieDbMovie::cMovieDbMovie(string json) { + this->json = json; + title = ""; + originalTitle = ""; + tagline = ""; + overview = ""; + backdropPath = ""; + posterPath = ""; + adult = false; + collectionID = 0; + collectionName = ""; + collectionPosterPath = ""; + collectionBackdropPath = ""; + budget = 0; + revenue = 0; + genres = ""; + homepage = ""; + imdbid = ""; + releaseDate = ""; + runtime = 0; + popularity = 0.0; + voteAverage = 0.0; + currentActor = 0; + backdropWidth = 1280; + backdropHeight = 720; + posterWidth = 500; + posterHeight = 750; +} + +cMovieDbMovie::~cMovieDbMovie() { + for(std::vector<cMovieDBActor*>::const_iterator it = actors.begin(); it != actors.end(); it++) { + delete *it; + } + actors.clear(); +} + +void cMovieDbMovie::ParseJSON(void) { + cJsonLoader jMovie(json.c_str()); + if (!jMovie.isObject()) { + return; + } + json_t *jTitle = jMovie.objectByName("title"); + if(json_is_string(jTitle)) { + title = json_string_value(jTitle);; + } + json_t *jOriginalTitle = jMovie.objectByName("original_title"); + if(json_is_string(jOriginalTitle)) { + originalTitle = json_string_value(jOriginalTitle); + } + json_t *jOverview = jMovie.objectByName("overview"); + if(json_is_string(jOverview)) { + overview = json_string_value(jOverview); + } + json_t *jBackdrop = jMovie.objectByName("backdrop_path"); + if(json_is_string(jBackdrop)) { + backdropPath = backdropBaseUrl + json_string_value(jBackdrop); + } + json_t *jPoster = jMovie.objectByName("poster_path"); + if(json_is_string(jPoster)) { + posterPath = posterBaseUrl + json_string_value(jPoster); + } + json_t *jAdult = jMovie.objectByName("adult"); + if(json_is_true(jAdult)) { + adult = true; + } + json_t *collection = jMovie.objectByName("belongs_to_collection"); + if(json_is_object(collection)) { + json_t *colID = json_object_get(collection, "id"); + if (json_is_integer(colID)) { + collectionID = (int)json_integer_value(colID); + } + json_t *colName = json_object_get(collection, "name"); + if(json_is_string(jPoster)) { + collectionName = json_string_value(colName); + } + json_t *colPoster = json_object_get(collection, "poster_path"); + if(json_is_string(colPoster)) { + collectionPosterPath = posterBaseUrl + json_string_value(colPoster); + } + json_t *colBackdrop = json_object_get(collection, "backdrop_path"); + if(json_is_string(colBackdrop)) { + collectionBackdropPath = backdropBaseUrl + json_string_value(colBackdrop); + } + } + json_t *jBudget = jMovie.objectByName("budget"); + if (json_is_integer(jBudget)) { + budget = (int)json_integer_value(jBudget); + } + json_t *aGenres = jMovie.objectByName("genres"); + if(json_is_array(aGenres)) { + size_t numGenres = json_array_size(aGenres); + for (size_t res = 0; res < numGenres; res++) { + json_t *result = json_array_get(aGenres, res); + if (json_is_object(result)) { + json_t *jGenre = json_object_get(result, "name"); + if(json_is_string(jGenre)) { + genres += json_string_value(jGenre); + if ((res+1) < numGenres) + genres += " | "; + } + } + } + } + json_t *jHomepage = jMovie.objectByName("homepage"); + if(json_is_string(jHomepage)) { + homepage = json_string_value(jHomepage); + } + json_t *jIMDB = jMovie.objectByName("imdb_id"); + if(json_is_string(jIMDB)) { + imdbid = json_string_value(jIMDB); + } + json_t *jPopularity = jMovie.objectByName("popularity"); + if(json_is_real(jPopularity)) { + popularity = json_real_value(jPopularity); + } + json_t *jReleaseDate = jMovie.objectByName("release_date"); + if(json_is_string(jReleaseDate)) { + releaseDate = json_string_value(jReleaseDate); + } + json_t *jRevenue = jMovie.objectByName("revenue"); + if (json_is_integer(jRevenue)) { + revenue = (int)json_integer_value(jRevenue); + } + json_t *jRuntime = jMovie.objectByName("runtime"); + if (json_is_integer(jRuntime)) { + runtime = (int)json_integer_value(jRuntime); + } + json_t *jVote = jMovie.objectByName("vote_average"); + if(json_is_real(jVote)) { + voteAverage = json_real_value(jVote); + } + json_t *jTagline = jMovie.objectByName("tagline"); + if(json_is_string(jTagline)) { + tagline = json_string_value(jTagline); + } +} + +int cMovieDbMovie::ParseJSONForMovieId(string movieSearchString) { + //convert searchstring to lower case + transform(movieSearchString.begin(), movieSearchString.end(), movieSearchString.begin(), ::tolower); + cJsonLoader root(json.c_str()); + if (!root.isObject()) { + return -1; + } + json_t *results = root.objectByName("results"); + if(!json_is_array(results)) { + return -1; + } + size_t numResults = json_array_size(results); + for (size_t res = 0; res < numResults; res++) { + json_t *result = json_array_get(results, res); + if (!json_is_object(result)) { + return -1; + } + json_t *title = json_object_get(result, "title"); + if (!json_is_string(title)) { + return -1; + } + string resultTitle = json_string_value(title); + //convert result to lower case + transform(resultTitle.begin(), resultTitle.end(), resultTitle.begin(), ::tolower); + json_t *jId = json_object_get(result, "id"); + if (json_is_integer(jId)) { + int id = (int)json_integer_value(jId); + searchResult sRes; + sRes.id = id; + sRes.distance = sentence_distance(resultTitle, movieSearchString); + resultSet.push_back(sRes); + } + } + return FindBestResult(); +} + +int cMovieDbMovie::FindBestResult(void) { + int resID = -1; + int bestMatch = -1; + int numResults = resultSet.size(); + for (int i=0; i<numResults; i++) { + if (i == 0) { + bestMatch = resultSet[i].distance; + resID = resultSet[i].id; + } else if (resultSet[i].distance < bestMatch) { + bestMatch = resultSet[i].distance; + resID = resultSet[i].id; + } + } + return resID; +} + +void cMovieDbMovie::ReadActors(void) { + stringstream url; + url << baseUrl << "/movie/" << id << "/casts?api_key=" << apiKey; + string actorsJSON; + if (curl.GetUrl(url.str().c_str(), &actorsJSON)) { + cMovieDbActors act(actorsJSON, actorBaseUrl); + act.ParseJSON(&actors); + } +} + +cMovieDBActor *cMovieDbMovie::GetActor(void) { + int numActors = actors.size(); + if ((numActors < 1) || (currentActor >= numActors)) + return NULL; + cMovieDBActor *act = actors[currentActor]; + currentActor++; + return act; +} + +void cMovieDbMovie::Dump(void) { + cout << "-------------- MOVIE DUMP ---------------" << endl; + cout << "Title: " << title << endl; + cout << "Original Title: " << originalTitle << endl; + cout << "Tagline: " << tagline << endl; + cout << "Overview: " << overview << endl; + cout << "BackdropPath: " << backdropPath << endl; + cout << "PosterPath: " << posterPath << endl; + cout << "Adult: " << (adult?"true":"false") << endl; + if (!collectionID) { + cout << "Movie belongs not to a collection" << endl; + } else { + cout << "CollectionID: " << collectionID << endl; + cout << "Collection Name: " << collectionName << endl; + cout << "Collection BackdropPath: " << collectionBackdropPath << endl; + cout << "Collection PosterPath: " << collectionPosterPath << endl; + } + cout << "Budget: " << budget << "$" << endl; + cout << "Revenue: " << revenue << "$" << endl; + cout << "Genres: " << genres << endl; + cout << "Homepage: " << homepage << endl; + cout << "imdbID: " << imdbid << endl; + cout << "Release Date: " << releaseDate << endl; + cout << "Runtime: " << runtime << " min" << endl; + cout << "Popularity: " << popularity << endl; + cout << "Vote Average: " << voteAverage << endl; + int numActors = actors.size(); + cout << "--------------------- " << numActors << " actors found -----------------" << endl; + for (int i=0; i<numActors; i++) { + actors[i]->Dump(); + } +} diff --git a/scraper/themoviedbscraper/moviedbmovie.h b/scraper/themoviedbscraper/moviedbmovie.h new file mode 100644 index 0000000..9e9c2fd --- /dev/null +++ b/scraper/themoviedbscraper/moviedbmovie.h @@ -0,0 +1,68 @@ +#ifndef __TVSCRAPER_MOVIEDBMOVIE_H +#define __TVSCRAPER_MOVIEDBMOVIE_H + +#include "moviedbactor.h" + +using namespace std; + +struct searchResult { + int id; + int distance; +}; + +// --- cMovieDbMovie ------------------------------------------------------------- + +class cMovieDbMovie { +private: + string json; + vector<searchResult> resultSet; + string apiKey; + string baseUrl; + string posterBaseUrl; + string backdropBaseUrl; + string actorBaseUrl; + vector<cMovieDBActor*> actors; + int currentActor; + int FindBestResult(void); +public: + cMovieDbMovie(string json); + virtual ~cMovieDbMovie(void); + int id; + string title; + string originalTitle; + string tagline; + string overview; + string backdropPath; + int backdropWidth; + int backdropHeight; + string posterPath; + int posterWidth; + int posterHeight; + bool adult; + int collectionID; + string collectionName; + string collectionPosterPath; + string collectionBackdropPath; + int budget; + int revenue; + string genres; + string homepage; + string imdbid; + string releaseDate; + int runtime; + float popularity; + float voteAverage; + int ParseJSONForMovieId(string movieSearchString); + void ParseJSON(void); + void SetApiKey(string apiKey) { this->apiKey = apiKey; }; + void SetBaseUrl(string baseUrl) { this->baseUrl = baseUrl; }; + void SetPosterBaseUrl(string url) { posterBaseUrl = url; }; + void SetBackdropBaseUrl(string url) { backdropBaseUrl = url; }; + void SetActorBaseUrl(string url) { actorBaseUrl = url; }; + void ReadActors(void); + cMovieDBActor *GetActor(void); + void Dump(); +}; + + +#endif //__TVSCRAPER_TVDBSERIES_H diff --git a/scraper/themoviedbscraper/themoviedbscraper.c b/scraper/themoviedbscraper/themoviedbscraper.c new file mode 100644 index 0000000..62263a9 --- /dev/null +++ b/scraper/themoviedbscraper/themoviedbscraper.c @@ -0,0 +1,103 @@ + +#include <string> +#include <sstream> +#include <vector> +#include <map> +#include <algorithm> +#include <iostream> +#include <jansson.h> + +#include "../../lib/curl.h" +#include "../../tools/stringhelpers.h" +#include "themoviedbscraper.h" + +using namespace std; + +cMovieDBScraper::cMovieDBScraper(string language) { + apiKey = "abb01b5a277b9c2c60ec0302d83c5ee9"; + this->language = language; + baseURL = "api.themoviedb.org/3"; + posterSize = "w500"; + backdropSize = "w1280"; + actorthumbSize = "h632"; +} + +cMovieDBScraper::~cMovieDBScraper() { +} + +cMovieDbMovie *cMovieDBScraper::Scrap(string movieName, string year) { + int movieID = SearchMovie(movieName, year); + if (movieID < 1) { + return NULL; + } + cMovieDbMovie *movie = ReadMovie(movieID); + if (!movie) + return NULL; + return movie; +} + +bool cMovieDBScraper::Connect(void) { + stringstream url; + url << baseURL << "/configuration?api_key=" << apiKey; + string configJSON; + if (curl.GetUrl(url.str().c_str(), &configJSON)) { + return parseJSON(configJSON); + } + return false; +} + +bool cMovieDBScraper::parseJSON(string jsonString) { + cJsonLoader root(jsonString.c_str()); + if (!root.isObject()) { + return false; + } + json_t *images = root.objectByName("images"); + if(!json_is_object(images)) { + return false; + } + + json_t *imgUrl; + imgUrl = json_object_get(images, "base_url"); + if(!json_is_string(imgUrl)) { + return false; + } + imageUrl = json_string_value(imgUrl); + return true; +} + +int cMovieDBScraper::SearchMovie(string movieName, string year) { + stringstream url; + string movieJSON; + int movieID = -1; + char* escUrl = curl.EscapeUrl(movieName.c_str()); + + url << baseURL << "/search/movie?api_key=" << apiKey << "&query=" << escUrl << "&language=" << language.c_str(); + if (year.size() > 0) + url << "&year=" << year.c_str(); + curl.Free(escUrl); + + if (curl.GetUrl(url.str().c_str(), &movieJSON)) { + cMovieDbMovie movie(movieJSON); + movieID = movie.ParseJSONForMovieId(movieName); + } + + return movieID; +} + +cMovieDbMovie *cMovieDBScraper::ReadMovie(int movieID) { + stringstream url; + url << baseURL << "/movie/" << movieID << "?api_key=" << apiKey << "&language=" << language.c_str(); + string movieJSON; + cMovieDbMovie *movie = NULL; + if (curl.GetUrl(url.str().c_str(), &movieJSON)) { + movie = new cMovieDbMovie(movieJSON); + movie->id = movieID; + movie->SetBaseUrl(baseURL); + movie->SetApiKey(apiKey); + movie->SetPosterBaseUrl(imageUrl + posterSize); + movie->SetBackdropBaseUrl(imageUrl + backdropSize); + movie->SetActorBaseUrl(imageUrl + actorthumbSize); + movie->ParseJSON(); + } + return movie; +} diff --git a/scraper/themoviedbscraper/themoviedbscraper.h b/scraper/themoviedbscraper/themoviedbscraper.h new file mode 100644 index 0000000..d2f449e --- /dev/null +++ b/scraper/themoviedbscraper/themoviedbscraper.h @@ -0,0 +1,31 @@ +#ifndef __TVSCRAPER_MOVIEDBSCRAPER_H +#define __TVSCRAPER_MOVIEDBSCRAPER_H + +#include "moviedbmovie.h" +#include "moviedbactor.h" + +using namespace std; + +// --- cMovieDBScraper ------------------------------------------------------------- + +class cMovieDBScraper { +private: + string apiKey; + string language; + string baseURL; + string imageUrl; + string posterSize; + string backdropSize; + string actorthumbSize; + bool parseJSON(string jsonString); + int SearchMovie(string movieName, string year); +public: + cMovieDBScraper(string language); + virtual ~cMovieDBScraper(void); + bool Connect(void); + cMovieDbMovie *Scrap(string movieName, string year=""); + cMovieDbMovie *ReadMovie(int movieID); +}; + + +#endif //__TVSCRAPER_MOVIEDBSCRAPER_H diff --git a/scraper/thetvdbscraper/thetvdbscraper.c b/scraper/thetvdbscraper/thetvdbscraper.c new file mode 100644 index 0000000..e83690c --- /dev/null +++ b/scraper/thetvdbscraper/thetvdbscraper.c @@ -0,0 +1,188 @@ + +#include "../../lib/curl.h" +#include "thetvdbscraper.h" + +using namespace std; + +cTVDBScraper::cTVDBScraper(string language) { + apiKey = "E9DBB94CA50832ED"; + baseURL = "thetvdb.com"; + this->language = language; + mirrors = NULL; + xmlInitParser(); +} + +cTVDBScraper::~cTVDBScraper() { + if (mirrors) + delete mirrors; +} + +cTVDBSeries *cTVDBScraper::ScrapInitial(string seriesName) { + cTVDBSeries *series = NULL; + int seriesID = ReadSeries(seriesName); + if (seriesID) { + series = new cTVDBSeries(seriesID, language, apiKey, mirrors); + } + return series; +} + +cTVDBSeries *cTVDBScraper::GetSeries(int seriesID) { + return new cTVDBSeries(seriesID, language, apiKey, mirrors); +} + +cTVDBEpisode *cTVDBScraper::GetEpisode(int episodeID) { + return new cTVDBEpisode(episodeID, language, apiKey, mirrors); +} + +bool cTVDBScraper::Connect(void) { + stringstream url; + url << baseURL << "/api/" << apiKey << "/mirrors.xml"; + string mirrorsXML; + bool ok = false; + if (curl.GetUrl(url.str().c_str(), &mirrorsXML)) { + mirrors = new cTVDBMirrors(); + ok = mirrors->ParseXML(mirrorsXML); + } + return ok; +} + +int cTVDBScraper::GetServerTime(void) { + string url = "http://thetvdb.com/api/Updates.php?type=none"; + string serverTimeXML; + if (!curl.GetUrl(url.c_str(), &serverTimeXML)) { + return 0; + } + xmlDoc *doc = SetXMLDoc(serverTimeXML); + if (doc == NULL) + return 0; + //Root Element has to be <Items> + xmlNode *node = NULL; + node = xmlDocGetRootElement(doc); + if (!(node && !xmlStrcmp(node->name, (const xmlChar *)"Items"))) { + xmlFreeDoc(doc); + return 0; + } + node = node->children; + xmlNode *cur_node = NULL; + for (cur_node = node; cur_node; cur_node = cur_node->next) { + if ((cur_node->type == XML_ELEMENT_NODE) && !xmlStrcmp(cur_node->name, (const xmlChar *)"Time")) { + node = cur_node; + break; + } else { + node = NULL; + } + } + if (!node) { + xmlFreeDoc(doc); + return 0; + } + xmlChar *node_content = xmlNodeListGetString(doc, cur_node->xmlChildrenNode, 1); + int serverTime = atoi((const char *)node_content); + xmlFree(node_content); + xmlFreeDoc(doc); + return serverTime; +} + +bool cTVDBScraper::GetUpdatedSeriesandEpisodes(set<int> *updatedSeries, set<int> *updatedEpisodes, int lastScrap) { + stringstream url; + url << "http://thetvdb.com/api/Updates.php?type=all&time=" << lastScrap; + string updatedXML; + if (!curl.GetUrl(url.str().c_str(), &updatedXML)) { + return false; + } + xmlDoc *doc = SetXMLDoc(updatedXML); + if (doc == NULL) + return false; + //Root Element has to be <Items> + xmlNode *node = NULL; + node = xmlDocGetRootElement(doc); + if (!(node && !xmlStrcmp(node->name, (const xmlChar *)"Items"))) { + xmlFreeDoc(doc); + return false; + } + + xmlNode *cur_node = node->children; + for (; cur_node; cur_node = cur_node->next) { + if ((cur_node->type == XML_ELEMENT_NODE) && !xmlStrcmp(cur_node->name, (const xmlChar *)"Series")) { + xmlChar *node_content = xmlNodeListGetString(doc, cur_node->xmlChildrenNode, 1); + int seriesId = atoi((const char *)node_content); + xmlFree(node_content); + updatedSeries->insert(seriesId); + } else if ((cur_node->type == XML_ELEMENT_NODE) && !xmlStrcmp(cur_node->name, (const xmlChar *)"Episode")) { + xmlChar *node_content = xmlNodeListGetString(doc, cur_node->xmlChildrenNode, 1); + int episodeId = atoi((const char *)node_content); + xmlFree(node_content); + updatedEpisodes->insert(episodeId); + } + } + xmlFreeDoc(doc); + return true; +} + + +int cTVDBScraper::ReadSeries(string seriesName) { + int seriesID = 0; + stringstream url; + string seriesXML; + char* escUrl = curl.EscapeUrl(seriesName.c_str()); + + url << mirrors->GetMirrorXML() << "/api/GetSeries.php?seriesname=" << escUrl << "&language=" << language.c_str(); + curl.Free(escUrl); + + if (curl.GetUrl(url.str().c_str(), &seriesXML)) + seriesID = ParseXML(seriesXML); + + return seriesID; +} + +int cTVDBScraper::ParseXML(string xml) { + int seriesID = 0; + xmlDoc *doc = SetXMLDoc(xml); + if (doc == NULL) + return seriesID; + //Root Element has to be <Data> + xmlNode *node = NULL; + node = xmlDocGetRootElement(doc); + if (!(node && !xmlStrcmp(node->name, (const xmlChar *)"Data"))) { + xmlFreeDoc(doc); + return seriesID; + } + //Searching for <Series> + node = node->children; + xmlNode *cur_node = NULL; + for (cur_node = node; cur_node; cur_node = cur_node->next) { + if ((cur_node->type == XML_ELEMENT_NODE) && !xmlStrcmp(cur_node->name, (const xmlChar *)"Series")) { + node = cur_node; + break; + } else { + node = NULL; + } + } + if (!node) { + xmlFreeDoc(doc); + return seriesID; + } + //now read the first series + node = node->children; + xmlChar *node_content; + for (cur_node = node; cur_node; cur_node = cur_node->next) { + if (cur_node->type == XML_ELEMENT_NODE) { + node_content = xmlNodeListGetString(doc, cur_node->xmlChildrenNode, 1); + if (!node_content) + continue; + if (!xmlStrcmp(cur_node->name, (const xmlChar *)"seriesid")) { + seriesID = atoi((const char *)node_content); + xmlFree(node_content); + break; + } + xmlFree(node_content); + } + } + xmlFreeDoc(doc); + return seriesID; +} + +xmlDoc *cTVDBScraper::SetXMLDoc(string xml) { + xmlDoc *doc = xmlReadMemory(xml.c_str(), xml.size(), "noname.xml", NULL, 0); + return doc; +} diff --git a/scraper/thetvdbscraper/thetvdbscraper.h b/scraper/thetvdbscraper/thetvdbscraper.h new file mode 100644 index 0000000..051842c --- /dev/null +++ b/scraper/thetvdbscraper/thetvdbscraper.h @@ -0,0 +1,39 @@ +#ifndef __TVSCRAPER_TVDBSCRAPER_H +#define __TVSCRAPER_TVDBSCRAPER_H +#include <iostream> +#include <string> +#include <sstream> +#include <vector> +#include <map> +#include <set> +#include <libxml/parser.h> +#include <libxml/tree.h> +#include "tvdbseries.h" +#include "tvdbmirrors.h" + +using namespace std; + +// --- cTVDBScraper ------------------------------------------------------------- + +class cTVDBScraper { +private: + string apiKey; + string baseURL; + string language; + cTVDBMirrors *mirrors; + xmlDoc *SetXMLDoc(string xml); + int ParseXML(string xml); + int ReadSeries(string seriesName); +public: + cTVDBScraper(string language); + virtual ~cTVDBScraper(void); + bool Connect(void); + int GetServerTime(void); + cTVDBSeries *ScrapInitial(string seriesName); + bool GetUpdatedSeriesandEpisodes(set<int> *updatedSeries, set<int> *updatedEpisodes, int lastScrap); + cTVDBSeries *GetSeries(int seriesID); + cTVDBEpisode *GetEpisode(int episodeID); +}; + + +#endif //__TVSCRAPER_TVDBSCRAPER_H diff --git a/scraper/thetvdbscraper/tvdbactor.c b/scraper/thetvdbscraper/tvdbactor.c new file mode 100644 index 0000000..f64846a --- /dev/null +++ b/scraper/thetvdbscraper/tvdbactor.c @@ -0,0 +1,66 @@ +#include <iostream> +#include <string> +#include <sstream> +#include <vector> +#include <libxml/parser.h> +#include <libxml/tree.h> +#include "tvdbactor.h" + +using namespace std; + +cTVDBActors::cTVDBActors(string language, cTVDBMirrors *mirrors) { + this->language = language; + this->mirrors = mirrors; +} + +cTVDBActors::~cTVDBActors() { +} + +void cTVDBActors::ParseXML(string xml, vector<cTVDBActor*> *actors) { + xmlDoc *doc = xmlReadMemory(xml.c_str(), xml.size(), "noname.xml", NULL, 0); + if (doc == NULL) + return; + //Root Element has to be <Actors> + xmlNode *node = NULL; + node = xmlDocGetRootElement(doc); + if (!(node && !xmlStrcmp(node->name, (const xmlChar *)"Actors"))) { + xmlFreeDoc(doc); + return; + } + //Looping through actors + node = node->children; + xmlNode *cur_node = NULL; + for (cur_node = node; cur_node; cur_node = cur_node->next) { + if ((cur_node->type == XML_ELEMENT_NODE) && !xmlStrcmp(cur_node->name, (const xmlChar *)"Actor")) { + cTVDBActor *actor = ReadEntry(doc, cur_node->children); + actors->push_back(actor); + } + } + xmlFreeDoc(doc); +} + +cTVDBActor *cTVDBActors::ReadEntry(xmlDoc *doc, xmlNode *node) { + xmlNode *cur_node = NULL; + xmlChar *node_content; + cTVDBActor *actor = new cTVDBActor(); + for (cur_node = node; cur_node; cur_node = cur_node->next) { + if (cur_node->type == XML_ELEMENT_NODE) { + node_content = xmlNodeListGetString(doc, cur_node->xmlChildrenNode, 1); + if (!node_content) + continue; + if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Image")) { + actor->thumbUrl = mirrors->GetMirrorBanner() + (const char *)node_content; + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Name")) { + actor->name = (const char *)node_content; + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Role")) { + actor->role = (const char *)node_content; + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"id")) { + actor->id = atoi((const char *)node_content); + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"SortOrder")) { + actor->sortOrder = atoi((const char *)node_content); + } + xmlFree(node_content); + } + } + return actor; +}
\ No newline at end of file diff --git a/scraper/thetvdbscraper/tvdbactor.h b/scraper/thetvdbscraper/tvdbactor.h new file mode 100644 index 0000000..ed77199 --- /dev/null +++ b/scraper/thetvdbscraper/tvdbactor.h @@ -0,0 +1,54 @@ +#ifndef __TVSCRAPER_TVDBACTORS_H +#define __TVSCRAPER_TVDBACTORS_H + +#include <iostream> +#include <string> +#include <sstream> +#include <vector> +#include <libxml/parser.h> +#include <libxml/tree.h> +#include "tvdbmirrors.h" + +using namespace std; + +// --- cTVDBActor ------------------------------------------------------------- +class cTVDBActor { +public: + cTVDBActor(void) { + thumbUrl = ""; + name = ""; + role = ""; + id = 0; + sortOrder = 0; + thumbUrlWidth = 300; + thumbUrlHeight = 450; + }; + string thumbUrl; + int thumbUrlWidth; + int thumbUrlHeight; + string name; + string role; + int id; + int sortOrder; + void Dump() { + cout << "Actor name: " << name << ", ID: " << id << endl; + cout << "Actor role: " << role << endl; + cout << "Actor thumb: " << thumbUrl << endl; + cout << "Actor SortOrder: " << sortOrder << endl; + } +}; + +// --- cTVDBActors -------------------------------------------------------- + +class cTVDBActors { +private: + cTVDBMirrors *mirrors; + string language; + cTVDBActor *ReadEntry(xmlDoc *doc, xmlNode *node); +public: + cTVDBActors(string language, cTVDBMirrors *mirrors); + virtual ~cTVDBActors(void); + void ParseXML(string xml, vector<cTVDBActor*> *actors); +}; + +#endif //__TVSCRAPER_TVDBACTORS_H diff --git a/scraper/thetvdbscraper/tvdbepisode.c b/scraper/thetvdbscraper/tvdbepisode.c new file mode 100644 index 0000000..b9ac510 --- /dev/null +++ b/scraper/thetvdbscraper/tvdbepisode.c @@ -0,0 +1,147 @@ + +#include <iostream> +#include <string> +#include <sstream> +#include <vector> + +#include <libxml/parser.h> +#include <libxml/tree.h> + +#include "../../lib/curl.h" +#include "tvdbepisode.h" + +using namespace std; + +cTVDBEpisode::cTVDBEpisode(void) { + id = 0; + seriesID = 0; + number = 0; + season = 0; + combinedEpisode = 0; + combinedSeason = 0; + name = ""; + firstAired = ""; + guestStars = ""; + overview = ""; + rating = 0.0; + imageUrl = ""; + width = 400; + height = 225; + imgFlag = 0; + seasonId = 0; + lastUpdated = 0; +} + +cTVDBEpisode::cTVDBEpisode(int ID, string language, string apiKey, cTVDBMirrors *mirrors) { + this->language = language; + this->apiKey = apiKey; + this->mirrors = mirrors; + id = ID; + seriesID = 0; + number = 0; + season = 0; + combinedEpisode = 0; + combinedSeason = 0; + name = ""; + firstAired = ""; + guestStars = ""; + overview = ""; + rating = 0.0; + imageUrl = ""; + width = 400; + height = 225; + imgFlag = 0; + seasonId = 0; + lastUpdated = 0; +} + +void cTVDBEpisode::ReadEpisode(void) { + stringstream url; + url << mirrors->GetMirrorXML() << "/api/" << apiKey << "/episodes/" << id << "/" << language << ".xml"; + string episodeXML; + if (curl.GetUrl(url.str().c_str(), &episodeXML)) { + ParseXML(episodeXML); + } +} + +void cTVDBEpisode::ParseXML(string xml) { + xmlDoc *doc = xmlReadMemory(xml.c_str(), xml.size(), "noname.xml", NULL, 0); + if (doc == NULL) + return; + //Root Element has to be <Data> + xmlNode *node = NULL; + node = xmlDocGetRootElement(doc); + if (!(node && !xmlStrcmp(node->name, (const xmlChar *)"Data"))) { + xmlFreeDoc(doc); + return; + } + //Looping through episodes + node = node->children; + xmlNode *cur_node = NULL; + for (cur_node = node; cur_node; cur_node = cur_node->next) { + if ((cur_node->type == XML_ELEMENT_NODE) && !xmlStrcmp(cur_node->name, (const xmlChar *)"Episode")) { + ReadEpisodeFromXML(doc, cur_node->children, mirrors); + } + } + xmlFreeDoc(doc); +} + +void cTVDBEpisode::ReadEpisodeFromXML(xmlDoc *myDoc, xmlNode *node, cTVDBMirrors *mirrors) { + xmlNode *cur_node = NULL; + xmlChar *node_content; + for (cur_node = node; cur_node; cur_node = cur_node->next) { + if (cur_node->type == XML_ELEMENT_NODE) { + node_content = xmlNodeListGetString(myDoc, cur_node->xmlChildrenNode, 1); + if (!node_content) + continue; + if (!xmlStrcmp(cur_node->name, (const xmlChar *)"id")) { + id = atoi((const char *)node_content); + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"EpisodeNumber")) { + number = atoi((const char *)node_content); + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"seriesid")) { + seriesID = atoi((const char *)node_content); + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"SeasonNumber")) { + season = atoi((const char *)node_content); + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Combined_episodenumber")) { + combinedEpisode = atoi((const char *)node_content); + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Combined_season")) { + combinedSeason = atoi((const char *)node_content); + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"EpisodeName")) { + name = (const char *)node_content; + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"FirstAired")) { + firstAired = (const char *)node_content; + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"GuestStars")) { + guestStars = (const char *)node_content; + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Overview")) { + overview = (const char *)node_content; + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Rating")) { + rating = atof((const char *)node_content); + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"filename")) { + imageUrl = mirrors->GetMirrorBanner() + (const char *)node_content; + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"thumb_width")) { + width = atoi((const char *)node_content); + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"thumb_height")) { + height = atoi((const char *)node_content); + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"EpImgFlag")) { + imgFlag = atoi((const char *)node_content); + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"seasonid")) { + seasonId = atoi((const char *)node_content); + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"lastupdated")) { + lastUpdated = atoi((const char *)node_content); + } + xmlFree(node_content); + } + } +} + +void cTVDBEpisode::Dump() { + cout << "----------------------------------------" << endl; + cout << "Season " << season << ", Episode " << number << ", Name: " << name << ", ID: " << id << ", SeasonID " << seasonId << endl; + cout << "combinedSeason: " << combinedSeason << ", combinedEpisode: " << combinedEpisode << endl; + cout << "First Aired: " << firstAired << endl; + cout << "Guest Stars: " << guestStars << endl; + cout << "Overview: " << overview << endl; + cout << "Rating: " << rating << endl; + cout << "imageUrl: " << imageUrl << ", Size: " << width << " x " << height << ", Flag: " << imgFlag << endl; + cout << "Last Update: " << lastUpdated << endl; +} diff --git a/scraper/thetvdbscraper/tvdbepisode.h b/scraper/thetvdbscraper/tvdbepisode.h new file mode 100644 index 0000000..945278b --- /dev/null +++ b/scraper/thetvdbscraper/tvdbepisode.h @@ -0,0 +1,45 @@ +#ifndef __TVSCRAPER_TVDBEPISODE_H +#define __TVSCRAPER_TVDBEPISODE_H + +#include <iostream> +#include <string> +#include <sstream> +#include <vector> +#include <libxml/parser.h> +#include <libxml/tree.h> +#include "tvdbmirrors.h" + +using namespace std; + +// --- cTVDBEpisode ------------------------------------------------------------- +class cTVDBEpisode { +private: + string apiKey; + cTVDBMirrors *mirrors; + string language; + void ParseXML(string xml); +public: + cTVDBEpisode(void); + cTVDBEpisode(int ID, string language, string apiKey, cTVDBMirrors *mirrors); + int id; + int seriesID; + int number; + int season; + int combinedSeason; + int combinedEpisode; + string name; + string firstAired; + string guestStars; + string overview; + float rating; + string imageUrl; + int width; + int height; + int imgFlag; + int seasonId; + int lastUpdated; + void ReadEpisode(void); + void ReadEpisodeFromXML(xmlDoc *myDoc, xmlNode *node, cTVDBMirrors *mirrors); + void Dump(); +}; +#endif //cTVDBEpisode diff --git a/scraper/thetvdbscraper/tvdbmedia.c b/scraper/thetvdbscraper/tvdbmedia.c new file mode 100644 index 0000000..7495f3b --- /dev/null +++ b/scraper/thetvdbscraper/tvdbmedia.c @@ -0,0 +1,202 @@ +#include "tvdbmedia.h" + +using namespace std; + +cTVDBSeriesMedia::cTVDBSeriesMedia(string language, cTVDBMirrors *mirrors) { + this->language = language; + fallbackLanguage = "en"; + this->mirrors = mirrors; +} + +cTVDBSeriesMedia::~cTVDBSeriesMedia() { +} + +void cTVDBSeriesMedia::ParseXML(string xml, vector<cTVDBBanner*> *banners, vector<cTVDBFanart*> *fanarts, vector<cTVDBPoster*> *posters, vector<cTVDBSeasonPoster*> *seasonPosters) { + xmlDoc *doc = xmlReadMemory(xml.c_str(), xml.size(), "noname.xml", NULL, 0); + this->banners = banners; + this->fanarts = fanarts; + this->posters = posters; + this->seasonPosters = seasonPosters; + if (doc == NULL) + return; + //Root Element has to be <Banners> + xmlNode *node = NULL; + node = xmlDocGetRootElement(doc); + if (!(node && !xmlStrcmp(node->name, (const xmlChar *)"Banners"))) { + xmlFreeDoc(doc); + return; + } + //Looping through banners + node = node->children; + xmlNode *cur_node = NULL; + for (cur_node = node; cur_node; cur_node = cur_node->next) { + if ((cur_node->type == XML_ELEMENT_NODE) && !xmlStrcmp(cur_node->name, (const xmlChar *)"Banner")) { + ReadEntry(doc, cur_node->children); + } + } + xmlFreeDoc(doc); +} + +void cTVDBSeriesMedia::ReadEntry(xmlDoc *doc, xmlNode *node) { + xmlNode *cur_node = NULL; + xmlChar *node_content; + for (cur_node = node; cur_node; cur_node = cur_node->next) { + if (cur_node->type == XML_ELEMENT_NODE) { + node_content = xmlNodeListGetString(doc, cur_node->xmlChildrenNode, 1); + if (!node_content) + continue; + if (!xmlStrcmp(cur_node->name, (const xmlChar *)"BannerType")) { + + if (!xmlStrcmp(node_content, (const xmlChar *)"poster")) + ReadPoster(doc, node); + else if (!xmlStrcmp(node_content, (const xmlChar *)"fanart")) + ReadFanart(doc, node); + else if (!xmlStrcmp(node_content, (const xmlChar *)"series")) + ReadBanner(doc, node); + else if (!xmlStrcmp(node_content, (const xmlChar *)"season")) + ReadSeasonPoster(doc, node); + + } + xmlFree(node_content); + } + } +} + +void cTVDBSeriesMedia::ReadFanart(xmlDoc *doc, xmlNode *node) { + xmlNode *cur_node = NULL; + xmlChar *node_content; + cTVDBFanart *fanart = new cTVDBFanart(); + //default size + fanart->width = 1920; + fanart->height = 1080; + for (cur_node = node; cur_node; cur_node = cur_node->next) { + if (cur_node->type == XML_ELEMENT_NODE) { + node_content = xmlNodeListGetString(doc, cur_node->xmlChildrenNode, 1); + if (!node_content) + continue; + if (!xmlStrcmp(cur_node->name, (const xmlChar *)"BannerPath")) { + fanart->url = mirrors->GetMirrorBanner() + (const char *)node_content; + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Language")) { + fanart->language = (const char *)node_content; + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Rating")) { + fanart->rating = atof((const char *)node_content); + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"ThumbnailPath")) { + fanart->thumbUrl = mirrors->GetMirrorBanner() + (const char *)node_content; + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"BannerType2")) { + string fanartSize = (const char *)node_content; + size_t posX = fanartSize.find("x"); + fanart->width = atoi(fanartSize.substr(0, posX).c_str()); + fanart->height = atoi(fanartSize.substr(posX+1).c_str()); + } + xmlFree(node_content); + } + } + if ( (fanart->url.size() == 0) || + ((fanart->language.compare(language)) && (fanart->language.compare(fallbackLanguage)))) { + delete fanart; + return; + } + fanarts->push_back(fanart); +} + +void cTVDBSeriesMedia::ReadPoster(xmlDoc *doc, xmlNode *node) { + xmlNode *cur_node = NULL; + xmlChar *node_content; + cTVDBPoster *poster = new cTVDBPoster(); + //default size + poster->width = 400; + poster->height = 578; + for (cur_node = node; cur_node; cur_node = cur_node->next) { + if (cur_node->type == XML_ELEMENT_NODE) { + node_content = xmlNodeListGetString(doc, cur_node->xmlChildrenNode, 1); + if (!node_content) + continue; + if (!xmlStrcmp(cur_node->name, (const xmlChar *)"BannerPath")) { + poster->url = mirrors->GetMirrorBanner() + (const char *)node_content; + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Language")) { + poster->language = (const char *)node_content; + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Rating")) { + poster->rating = atof((const char *)node_content); + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"BannerType2")) { + string posterSize = (const char *)node_content; + size_t posX = posterSize.find("x"); + poster->width = atoi(posterSize.substr(0, posX).c_str()); + poster->height = atoi(posterSize.substr(posX+1).c_str()); + } + xmlFree(node_content); + } + } + if ( (poster->url.size() == 0) || + ((poster->language.compare(language)) && (poster->language.compare(fallbackLanguage)))) { + delete poster; + return; + } + posters->push_back(poster); +} + +void cTVDBSeriesMedia::ReadSeasonPoster(xmlDoc *doc, xmlNode *node) { + xmlNode *cur_node = NULL; + xmlChar *node_content; + cTVDBSeasonPoster *poster = new cTVDBSeasonPoster(); + poster->width = 400; + poster->height = 578; + bool add = true; + for (cur_node = node; cur_node; cur_node = cur_node->next) { + if (cur_node->type == XML_ELEMENT_NODE) { + node_content = xmlNodeListGetString(doc, cur_node->xmlChildrenNode, 1); + if (!node_content) + continue; + if (!xmlStrcmp(cur_node->name, (const xmlChar *)"BannerPath")) { + poster->url = mirrors->GetMirrorBanner() + (const char *)node_content; + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Language")) { + poster->language = (const char *)node_content; + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Rating")) { + poster->rating = atof((const char *)node_content); + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Season")) { + poster->season = atoi((const char *)node_content); + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"BannerType2")) { + string bt2 = (const char *)node_content; + if (!bt2.compare("seasonwide")) + add = false; + } + xmlFree(node_content); + } + } + if ( (poster->url.size() == 0) || + (poster->season == 0) || + (!add) || + ((poster->language.compare(language)) && (poster->language.compare(fallbackLanguage)))) { + delete poster; + return; + } + seasonPosters->push_back(poster); +} + +void cTVDBSeriesMedia::ReadBanner(xmlDoc *doc, xmlNode *node) { + xmlNode *cur_node = NULL; + xmlChar *node_content; + cTVDBBanner *banner = new cTVDBBanner(); + banner->width = 758; + banner->height = 140; + for (cur_node = node; cur_node; cur_node = cur_node->next) { + if (cur_node->type == XML_ELEMENT_NODE) { + node_content = xmlNodeListGetString(doc, cur_node->xmlChildrenNode, 1); + if (!node_content) + continue; + if (!xmlStrcmp(cur_node->name, (const xmlChar *)"BannerPath")) { + banner->url = mirrors->GetMirrorBanner() + (const char *)node_content; + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Language")) { + banner->language = (const char *)node_content; + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Rating")) { + banner->rating = atof((const char *)node_content); + } + xmlFree(node_content); + } + } + if ( (banner->url.size() == 0) || + ((banner->language.compare(language)) && (banner->language.compare(fallbackLanguage)))) { + delete banner; + return; + } + banners->push_back(banner); +} diff --git a/scraper/thetvdbscraper/tvdbmedia.h b/scraper/thetvdbscraper/tvdbmedia.h new file mode 100644 index 0000000..4277c39 --- /dev/null +++ b/scraper/thetvdbscraper/tvdbmedia.h @@ -0,0 +1,111 @@ +#ifndef __TVSCRAPER_TVDBMEDIA_H +#define __TVSCRAPER_TVDBMEDIA_H + +#include <iostream> +#include <string> +#include <sstream> +#include <vector> +#include <libxml/parser.h> +#include <libxml/tree.h> +#include "tvdbmirrors.h" + +using namespace std; + +// --- cTVDBMedia ------------------------------------------------------------- +class cTVDBMedia { +public: + cTVDBMedia(void) { + url = ""; + language = ""; + width = 0; + height = 0; + rating = 0.0; + }; + string url; + string language; + int width, height; + double rating; +}; + +// --- cTVDBFanart ------------------------------------------------------------- +class cTVDBFanart : public cTVDBMedia { +public: + cTVDBFanart(void) { + thumbUrl = ""; + }; + string thumbUrl; + void Dump() { + cout << "Url: " << url << endl; + cout << "Url Thumbnail: " << thumbUrl << endl; + cout << "Language: " << language << endl; + cout << "Size: " << width << " x " << height << endl; + cout << "Rating: " << rating << endl; + }; +}; + +// --- cTVDBPoster ------------------------------------------------------------- +class cTVDBPoster : public cTVDBMedia { +public: + cTVDBPoster(void) { + }; + void Dump() { + cout << "Url: " << url << endl; + cout << "Language: " << language << endl; + cout << "Size: " << width << " x " << height << endl; + cout << "Rating: " << rating << endl; + }; +}; + +// --- cTVDBSeasonPoster ------------------------------------------------------------- +class cTVDBSeasonPoster : public cTVDBMedia { +public: + cTVDBSeasonPoster(void) { + season = 0; + }; + int season; + void Dump() { + cout << "Url: " << url << endl; + cout << "Season: " << season << endl; + cout << "Language: " << language << endl; + cout << "Size: " << width << " x " << height << endl; + cout << "Rating: " << rating << endl; + }; +}; + +// --- cTVDBBanner ------------------------------------------------------------- +class cTVDBBanner : public cTVDBMedia { +public: + cTVDBBanner(void) { + }; + int season; + void Dump() { + cout << "Url: " << url << endl; + cout << "Language: " << language << endl; + cout << "Size: " << width << " x " << height << endl; + cout << "Rating: " << rating << endl; + }; +}; + +// --- cTVDBSeriesMedia -------------------------------------------------------- + +class cTVDBSeriesMedia { +private: + string language; + string fallbackLanguage; + cTVDBMirrors *mirrors; + vector<cTVDBBanner*> *banners; + vector<cTVDBFanart*> *fanarts; + vector<cTVDBPoster*> *posters; + vector<cTVDBSeasonPoster*> *seasonPosters; + void ReadEntry(xmlDoc *doc, xmlNode *node); + void ReadFanart(xmlDoc *doc, xmlNode *node); + void ReadPoster(xmlDoc *doc, xmlNode *node); + void ReadBanner(xmlDoc *doc, xmlNode *node); + void ReadSeasonPoster(xmlDoc *doc, xmlNode *node); +public: + cTVDBSeriesMedia(string language, cTVDBMirrors *mirrors); + virtual ~cTVDBSeriesMedia(void); + void ParseXML(string xml, vector<cTVDBBanner*> *banners, vector<cTVDBFanart*> *fanarts, vector<cTVDBPoster*> *posters, vector<cTVDBSeasonPoster*> *seasonPosters); +}; + +#endif //__TVSCRAPER_TVDBMEDIA_H diff --git a/scraper/thetvdbscraper/tvdbmirrors.c b/scraper/thetvdbscraper/tvdbmirrors.c new file mode 100644 index 0000000..2c063ec --- /dev/null +++ b/scraper/thetvdbscraper/tvdbmirrors.c @@ -0,0 +1,88 @@ +#include "tvdbmirrors.h" + +using namespace std; + +cTVDBMirrors::cTVDBMirrors(void) { +} + +cTVDBMirrors::~cTVDBMirrors() { +} + +string cTVDBMirrors::GetMirrorXML(void) { + if (xmlmirrors.size() == 0) + return ""; + int randMirror = rand() % xmlmirrors.size(); + return xmlmirrors[randMirror]; +} +string cTVDBMirrors::GetMirrorBanner(void) { + if (bannermirrors.size() == 0) + return ""; + int randMirror = rand() % bannermirrors.size(); + return bannermirrors[randMirror] + "/banners/"; +} + +string cTVDBMirrors::GetMirrorZip(void) { + if (zipmirrors.size() == 0) + return ""; + int randMirror = rand() % zipmirrors.size(); + return zipmirrors[randMirror]; +} + +bool cTVDBMirrors::ParseXML(string xml) { + xmlDoc *doc = xmlReadMemory(xml.c_str(), xml.size(), "noname.xml", NULL, 0); + if (doc == NULL) + return false; + //Root Element has to be <Mirrors> + xmlNode *node = NULL; + node = xmlDocGetRootElement(doc); + if (!(node && !xmlStrcmp(node->name, (const xmlChar *)"Mirrors"))) { + xmlFreeDoc(doc); + return false; + } + //Loop through <Mirror> + node = node->children; + xmlNode *cur_node = NULL; + bool ok = false; + for (cur_node = node; cur_node; cur_node = cur_node->next) { + if ((cur_node->type == XML_ELEMENT_NODE) && !xmlStrcmp(cur_node->name, (const xmlChar *)"Mirror")) { + ok = ReadEntry(doc, cur_node->children); + } + } + xmlFreeDoc(doc); + return ok; +} + +bool cTVDBMirrors::ReadEntry(xmlDoc *doc, xmlNode *node) { + xmlNode *cur_node = NULL; + xmlChar *node_content; + string path = ""; + int typemask = 0; + for (cur_node = node; cur_node; cur_node = cur_node->next) { + if (cur_node->type == XML_ELEMENT_NODE) { + node_content = xmlNodeListGetString(doc, cur_node->xmlChildrenNode, 1); + if (!node_content) + continue; + if (!xmlStrcmp(cur_node->name, (const xmlChar *)"mirrorpath")) { + path = (const char *)node_content; + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"typemask")) { + typemask = atoi((const char *)node_content); + } + xmlFree(node_content); + } + } + return CreateMirror(path, typemask); +} + +bool cTVDBMirrors::CreateMirror(string path, int typemask) { + if (path.size() < 1) + return false; + if (typemask < 1 || typemask > 7) + return false; + if (typemask & 1) + xmlmirrors.push_back(path); + if (typemask & 2) + bannermirrors.push_back(path); + if (typemask & 4) + zipmirrors.push_back(path); + return true; +}
\ No newline at end of file diff --git a/scraper/thetvdbscraper/tvdbmirrors.h b/scraper/thetvdbscraper/tvdbmirrors.h new file mode 100644 index 0000000..cf7dc62 --- /dev/null +++ b/scraper/thetvdbscraper/tvdbmirrors.h @@ -0,0 +1,32 @@ +#ifndef __TVSCRAPER_TVDBMIRRORS_H +#define __TVSCRAPER_TVDBMIRRORS_H + +#include <iostream> +#include <string> +#include <sstream> +#include <vector> +#include <libxml/parser.h> +#include <libxml/tree.h> + +using namespace std; + +// --- cTVDBMirrors ------------------------------------------------------------- + +class cTVDBMirrors { +private: + vector<string> xmlmirrors; + vector<string> bannermirrors; + vector<string> zipmirrors; + bool ReadEntry(xmlDoc *doc, xmlNode *node); + bool CreateMirror(string path, int typemask); +public: + cTVDBMirrors(void); + virtual ~cTVDBMirrors(void); + bool ParseXML(string xml); + string GetMirrorXML(void); + string GetMirrorBanner(void); + string GetMirrorZip(void); +}; + + +#endif //__TVSCRAPER_TVDBMIRRORS_H diff --git a/scraper/thetvdbscraper/tvdbseries.c b/scraper/thetvdbscraper/tvdbseries.c new file mode 100644 index 0000000..21dc78b --- /dev/null +++ b/scraper/thetvdbscraper/tvdbseries.c @@ -0,0 +1,281 @@ + +#include "../../lib/curl.h" +#include "tvdbseries.h" + +using namespace std; + +cTVDBSeries::cTVDBSeries(int ID, string language, string apiKey, cTVDBMirrors *mirrors) { + this->language = language; + this->apiKey = apiKey; + this->mirrors = mirrors; + seriesID = ID; + name = ""; + banner = ""; + fanart = ""; + poster = ""; + overview = ""; + firstAired = ""; + network = ""; + imbdid = ""; + genre = ""; + rating = 0.0; + status = ""; + currentEpisode = 0; + currentActor = 0; + currentFanart = 0; + currentPoster = 0; + currentSeasonPoster = 0; + currentBanner = 0; +} + +cTVDBSeries::~cTVDBSeries() { + for(std::vector<cTVDBEpisode*>::const_iterator it = episodes.begin(); it != episodes.end(); it++) { + delete *it; + } + episodes.clear(); + for(std::vector<cTVDBActor*>::const_iterator it = actors.begin(); it != actors.end(); it++) { + delete *it; + } + actors.clear(); + for(std::vector<cTVDBFanart*>::const_iterator it = fanarts.begin(); it != fanarts.end(); it++) { + delete *it; + } + fanarts.clear(); + for(std::vector<cTVDBPoster*>::const_iterator it = posters.begin(); it != posters.end(); it++) { + delete *it; + } + posters.clear(); + for(std::vector<cTVDBSeasonPoster*>::const_iterator it = seasonPosters.begin(); it != seasonPosters.end(); it++) { + delete *it; + } + seasonPosters.clear(); + for(std::vector<cTVDBBanner*>::const_iterator it = banners.begin(); it != banners.end(); it++) { + delete *it; + } + banners.clear(); +} + +bool cTVDBSeries::ReadSeries(void) { + stringstream url; + url << mirrors->GetMirrorXML() << "/api/" << apiKey << "/series/" << seriesID << "/all/" << language << ".xml"; + string seriesXML; + if (curl.GetUrl(url.str().c_str(), &seriesXML)) { + ParseXML(seriesXML); + return true; + } + return false; +} + +void cTVDBSeries::ReadMedia(void) { + stringstream url; + url << mirrors->GetMirrorXML() << "/api/" << apiKey << "/series/" << seriesID << "/banners.xml"; + string bannersXML; + if (curl.GetUrl(url.str().c_str(), &bannersXML)) { + cTVDBSeriesMedia med(language, mirrors); + med.ParseXML(bannersXML, &banners, &fanarts, &posters, &seasonPosters); + } +} + +void cTVDBSeries::ReadActors(void) { + stringstream url; + url << mirrors->GetMirrorXML() << "/api/" << apiKey << "/series/" << seriesID << "/actors.xml"; + string actorsXML; + if (curl.GetUrl(url.str().c_str(), &actorsXML)) { + cTVDBActors act(language, mirrors); + act.ParseXML(actorsXML, &actors); + } +} + +void cTVDBSeries::ParseXML(string xml) { + xmlDoc *doc = xmlReadMemory(xml.c_str(), xml.size(), "noname.xml", NULL, 0); + if (doc == NULL) + return; + //Root Element has to be <Data> + xmlNode *node = NULL; + node = xmlDocGetRootElement(doc); + if (!(node && !xmlStrcmp(node->name, (const xmlChar *)"Data"))) { + xmlFreeDoc(doc); + return; + } + //Looping through episodes + node = node->children; + xmlNode *cur_node = NULL; + for (cur_node = node; cur_node; cur_node = cur_node->next) { + if ((cur_node->type == XML_ELEMENT_NODE) && !xmlStrcmp(cur_node->name, (const xmlChar *)"Series")) { + ReadSeriesData(doc, cur_node->children); + } else if ((cur_node->type == XML_ELEMENT_NODE) && !xmlStrcmp(cur_node->name, (const xmlChar *)"Episode")) { + cTVDBEpisode *episode = new cTVDBEpisode(); + episode->ReadEpisodeFromXML(doc, cur_node->children, mirrors); + episodes.push_back(episode); + } + } + xmlFreeDoc(doc); +} + +void cTVDBSeries::ReadSeriesData(xmlDoc *doc, xmlNode *node) { + xmlNode *cur_node = NULL; + xmlChar *node_content; + for (cur_node = node; cur_node; cur_node = cur_node->next) { + if (cur_node->type == XML_ELEMENT_NODE) { + node_content = xmlNodeListGetString(doc, cur_node->xmlChildrenNode, 1); + if (!node_content) + continue; + if (!xmlStrcmp(cur_node->name, (const xmlChar *)"FirstAired")) { + firstAired = (const char *)node_content; + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Genre")) { + genre = (const char *)node_content; + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"IMDB_ID")) { + imbdid = (const char *)node_content; + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Network")) { + network = (const char *)node_content; + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Overview")) { + overview = (const char *)node_content; + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Rating")) { + rating = atof((const char *)node_content); + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"SeriesName")) { + name = (const char *)node_content; + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"Status")) { + status = (const char *)node_content; + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"banner")) { + banner = mirrors->GetMirrorBanner() + (const char *)node_content; + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"fanart")) { + fanart = mirrors->GetMirrorBanner() + (const char *)node_content; + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"poster")) { + poster = mirrors->GetMirrorBanner() + (const char *)node_content; + } else if (!xmlStrcmp(cur_node->name, (const xmlChar *)"lastupdated")) { + lastUpdated = atoi((const char *)node_content); + } + xmlFree(node_content); + } + } +} + +cTVDBEpisode *cTVDBSeries::GetEpisode(void) { + int numEpisodes = episodes.size(); + if ((numEpisodes < 1) || (currentEpisode >= numEpisodes)) + return NULL; + cTVDBEpisode *epi = episodes[currentEpisode]; + currentEpisode++; + return epi; +} + +cTVDBActor *cTVDBSeries::GetActor(void) { + int numActors = actors.size(); + if ((numActors < 1) || (currentActor >= numActors)) + return NULL; + cTVDBActor *act = actors[currentActor]; + currentActor++; + return act; +} + +cTVDBFanart *cTVDBSeries::GetFanart(void) { + int numFanarts = fanarts.size(); + if ((numFanarts < 1) || (currentFanart >= numFanarts)) + return NULL; + cTVDBFanart *fan = fanarts[currentFanart]; + currentFanart++; + return fan; +} + +cTVDBPoster *cTVDBSeries::GetPoster(void) { + int numPosters = posters.size(); + if ((numPosters < 1) || (currentPoster >= numPosters)) + return NULL; + cTVDBPoster *pos = posters[currentPoster]; + currentPoster++; + return pos; +} + +cTVDBSeasonPoster *cTVDBSeries::GetSeasonPoster(void) { + int numSeasonPosters = seasonPosters.size(); + if ((numSeasonPosters < 1) || (currentSeasonPoster >= numSeasonPosters)) + return NULL; + cTVDBSeasonPoster *pos = seasonPosters[currentSeasonPoster]; + currentSeasonPoster++; + return pos; +} + +cTVDBSeasonPoster *cTVDBSeries::GetSeasonPoster(int season) { + int numSeasonPoster = seasonPosters.size(); + for (int i=0; i<numSeasonPoster; i++) { + if (seasonPosters[i]->season == season) + return seasonPosters[i]; + } + return NULL; +} + +cTVDBBanner *cTVDBSeries::GetBanner(void) { + int numBanners = banners.size(); + if ((numBanners < 1) || (currentBanner >= numBanners)) + return NULL; + cTVDBBanner *ban = banners[currentBanner]; + currentBanner++; + return ban; +} + +bool cTVDBSeries::GetPartAndSeason(int episodeId, int &season, int &part) { + for (vector<cTVDBEpisode*>::iterator ep = episodes.begin(); ep != episodes.end(); ep++) { + cTVDBEpisode *episode = *ep; + if(episode->id == episodeId) { + season = episode->season; + part = episode->number; + return true; + } + } + return false; +} + +void cTVDBSeries::Dump(int maxEntries) { + cout << "--------------------------- Series Info ----------------------------------" << endl; + cout << "series " << name << ", ID: " << seriesID <<endl; + cout << "Overview: " << overview << endl; + cout << "Banner: " << banner << endl; + cout << "Poster: " << poster << endl; + cout << "Fanart: " << fanart << endl; + cout << "imdb: " << imbdid << endl; + cout << "FirstAired: " << firstAired << endl; + cout << "Network: " << network << endl; + cout << "Status: " << status << endl; + cout << "lastUpdated: " << lastUpdated << endl; + cout << "Genre: " << genre << endl; + cout << "Rating: " << rating << endl; + + int size = episodes.size(); + cout << "--------------------------- " << size << " episodes ----------------------------------" << endl; + for (int i=0; i<size; i++) { + if (maxEntries && (i == maxEntries)) break; + episodes[i]->Dump(); + } + + size = actors.size(); + cout << "--------------------------- " << size << " actors ----------------------------------" << endl; + for (int i=0; i<size; i++) { + if (maxEntries && (i == maxEntries)) break; + actors[i]->Dump(); + } + + int numFanart = fanarts.size(); + cout << "--------------------------- " << numFanart << " fanarts ----------------------------------" << endl; + for (int i=0; i<numFanart; i++) { + if (maxEntries && (i == maxEntries)) break; + fanarts[i]->Dump(); + } + int numPoster = posters.size(); + cout << "--------------------------- " << numPoster << " posters ----------------------------------" << endl; + for (int i=0; i<numPoster; i++) { + if (maxEntries && (i == maxEntries)) break; + posters[i]->Dump(); + } + int numSeasonPoster = seasonPosters.size(); + cout << "--------------------------- " << numSeasonPoster << " season posters ---------------------------" << endl; + for (int i=0; i<numSeasonPoster; i++) { + if (maxEntries && (i == maxEntries)) break; + seasonPosters[i]->Dump(); + } + int numBanner = banners.size(); + cout << "--------------------------- " << numBanner << " banners ----------------------------------" << endl; + for (int i=0; i<numBanner; i++) { + if (maxEntries && (i == maxEntries)) break; + banners[i]->Dump(); + } +} diff --git a/scraper/thetvdbscraper/tvdbseries.h b/scraper/thetvdbscraper/tvdbseries.h new file mode 100644 index 0000000..bf7a84d --- /dev/null +++ b/scraper/thetvdbscraper/tvdbseries.h @@ -0,0 +1,69 @@ +#ifndef __TVSCRAPER_TVDBSERIES_H +#define __TVSCRAPER_TVDBSERIES_H + +#include <iostream> +#include <string> +#include <sstream> +#include <vector> +#include <libxml/parser.h> +#include <libxml/tree.h> +#include "tvdbmirrors.h" +#include "tvdbactor.h" +#include "tvdbmedia.h" +#include "tvdbepisode.h" + +using namespace std; + +// --- cTVDBSeries ------------------------------------------------------------- + +class cTVDBSeries { +private: + string apiKey; + cTVDBMirrors *mirrors; + string language; + vector<cTVDBEpisode*> episodes; + vector<cTVDBActor*> actors; + vector<cTVDBFanart*> fanarts; + vector<cTVDBPoster*> posters; + vector<cTVDBSeasonPoster*> seasonPosters; + vector<cTVDBBanner*> banners; + int currentEpisode; + int currentActor; + int currentFanart; + int currentPoster; + int currentSeasonPoster; + int currentBanner; + void ParseXML(string xml); + void ReadSeriesData(xmlDoc *doc, xmlNode *node); +public: + cTVDBSeries(int ID, string language, string apiKey, cTVDBMirrors *mirrors); + virtual ~cTVDBSeries(void); + bool ReadSeries(void); + void ReadMedia(void); + void ReadActors(void); + cTVDBEpisode *GetEpisode(void); + cTVDBActor *GetActor(void); + cTVDBFanart *GetFanart(void); + cTVDBPoster *GetPoster(void); + cTVDBSeasonPoster *GetSeasonPoster(void); + cTVDBSeasonPoster *GetSeasonPoster(int season); + cTVDBBanner *GetBanner(void); + bool GetPartAndSeason(int episodeId, int &season, int &part); + int seriesID; + int lastUpdated; + string name; + string banner; + string fanart; + string poster; + string overview; + string firstAired; + string network; + string imbdid; + string genre; + float rating; + string status; + void Dump(int maxEntries = 0); +}; + + +#endif //__TVSCRAPER_TVDBSERIES_H |