diff options
author | horchi <vdr@jwendel.de> | 2017-03-05 16:39:28 +0100 |
---|---|---|
committer | horchi <vdr@jwendel.de> | 2017-03-05 16:39:28 +0100 |
commit | e2a48d8701f91b8e24fbe9e99e91eb72a87bb749 (patch) | |
tree | 726f70554b4ca985a09ef6e30a7fdc8df089993c /scraper/themoviedbscraper | |
download | vdr-epg-daemon-e2a48d8701f91b8e24fbe9e99e91eb72a87bb749.tar.gz vdr-epg-daemon-e2a48d8701f91b8e24fbe9e99e91eb72a87bb749.tar.bz2 |
git init1.1.103
Diffstat (limited to 'scraper/themoviedbscraper')
-rw-r--r-- | scraper/themoviedbscraper/moviedbactor.c | 49 | ||||
-rw-r--r-- | scraper/themoviedbscraper/moviedbactor.h | 44 | ||||
-rw-r--r-- | scraper/themoviedbscraper/moviedbmovie.c | 255 | ||||
-rw-r--r-- | scraper/themoviedbscraper/moviedbmovie.h | 68 | ||||
-rw-r--r-- | scraper/themoviedbscraper/themoviedbscraper.c | 103 | ||||
-rw-r--r-- | scraper/themoviedbscraper/themoviedbscraper.h | 31 |
6 files changed, 550 insertions, 0 deletions
diff --git a/scraper/themoviedbscraper/moviedbactor.c b/scraper/themoviedbscraper/moviedbactor.c new file mode 100644 index 0000000..4e81af2 --- /dev/null +++ b/scraper/themoviedbscraper/moviedbactor.c @@ -0,0 +1,49 @@ + +#include <string> +#include <sstream> +#include <vector> +#include <iostream> +#include <jansson.h> +#include "moviedbactor.h" + +#include "../../tools/stringhelpers.h" + +using namespace std; + +cMovieDbActors::cMovieDbActors(string json, string actorsBaseUrl) { + this->json = json; + this->actorsBaseUrl = actorsBaseUrl; +} + +cMovieDbActors::~cMovieDbActors() { +} + +void cMovieDbActors::ParseJSON(vector<cMovieDBActor*> *actors) { + cJsonLoader jActors(json.c_str()); + if (!jActors.isObject()) { + return; + } + json_t *cast = jActors.objectByName("cast"); + if(!json_is_array(cast)) { + return; + } + size_t numActors = json_array_size(cast); + for (size_t i = 0; i < numActors; i++) { + json_t *jActor = json_array_get(cast, i); + if (!json_is_object(jActor)) { + return; + } + json_t *jId = json_object_get(jActor, "id"); + json_t *jName = json_object_get(jActor, "name"); + json_t *jRole = json_object_get(jActor, "character"); + json_t *jPath = json_object_get(jActor, "profile_path"); + if (!json_is_integer(jId) || !json_is_string(jName) || !json_is_string(jRole) || !json_is_string(jPath)) + return; + cMovieDBActor *actor = new cMovieDBActor(); + actor->id = json_integer_value(jId); + actor->name = json_string_value(jName); + actor->role = json_string_value(jRole); + actor->thumbUrl = actorsBaseUrl + json_string_value(jPath); + actors->push_back(actor); + } +} diff --git a/scraper/themoviedbscraper/moviedbactor.h b/scraper/themoviedbscraper/moviedbactor.h new file mode 100644 index 0000000..1b51682 --- /dev/null +++ b/scraper/themoviedbscraper/moviedbactor.h @@ -0,0 +1,44 @@ +#ifndef __TVSCRAPER_MOVIEDBACTORS_H +#define __TVSCRAPER_MOVIEDBACTORS_H + +using namespace std; + +// --- cMovieDBActor ------------------------------------------------------------- +class cMovieDBActor { +public: + cMovieDBActor(void) { + id = 0; + thumbUrl = ""; + name = ""; + role = ""; + width = 370; + height = 556; + }; + int id; + string thumbUrl; + int width; + int height; + string name; + string role; + void Dump(void) { + cout << "id: " << id << endl; + cout << "name: " << name << endl; + cout << "role: " << role << endl; + cout << "thumbnail: " << thumbUrl << endl; + }; +}; + +// --- cMovieDBActors ------------------------------------------------------------- + +class cMovieDbActors { +private: + string json; + string actorsBaseUrl; +public: + cMovieDbActors(string json, string actorsBaseUrl); + virtual ~cMovieDbActors(void); + void ParseJSON(vector<cMovieDBActor*> *actors); +}; + + +#endif //__TVSCRAPER_MOVIEDBACTORS_H diff --git a/scraper/themoviedbscraper/moviedbmovie.c b/scraper/themoviedbscraper/moviedbmovie.c new file mode 100644 index 0000000..8060930 --- /dev/null +++ b/scraper/themoviedbscraper/moviedbmovie.c @@ -0,0 +1,255 @@ + +#include <string> +#include <sstream> +#include <vector> +#include <algorithm> +#include <iostream> +#include <jansson.h> + +#include "../../lib/curl.h" +#include "../../tools/fuzzy.h" +#include "../../tools/stringhelpers.h" +#include "moviedbmovie.h" + +using namespace std; + +cMovieDbMovie::cMovieDbMovie(string json) { + this->json = json; + title = ""; + originalTitle = ""; + tagline = ""; + overview = ""; + backdropPath = ""; + posterPath = ""; + adult = false; + collectionID = 0; + collectionName = ""; + collectionPosterPath = ""; + collectionBackdropPath = ""; + budget = 0; + revenue = 0; + genres = ""; + homepage = ""; + imdbid = ""; + releaseDate = ""; + runtime = 0; + popularity = 0.0; + voteAverage = 0.0; + currentActor = 0; + backdropWidth = 1280; + backdropHeight = 720; + posterWidth = 500; + posterHeight = 750; +} + +cMovieDbMovie::~cMovieDbMovie() { + for(std::vector<cMovieDBActor*>::const_iterator it = actors.begin(); it != actors.end(); it++) { + delete *it; + } + actors.clear(); +} + +void cMovieDbMovie::ParseJSON(void) { + cJsonLoader jMovie(json.c_str()); + if (!jMovie.isObject()) { + return; + } + json_t *jTitle = jMovie.objectByName("title"); + if(json_is_string(jTitle)) { + title = json_string_value(jTitle);; + } + json_t *jOriginalTitle = jMovie.objectByName("original_title"); + if(json_is_string(jOriginalTitle)) { + originalTitle = json_string_value(jOriginalTitle); + } + json_t *jOverview = jMovie.objectByName("overview"); + if(json_is_string(jOverview)) { + overview = json_string_value(jOverview); + } + json_t *jBackdrop = jMovie.objectByName("backdrop_path"); + if(json_is_string(jBackdrop)) { + backdropPath = backdropBaseUrl + json_string_value(jBackdrop); + } + json_t *jPoster = jMovie.objectByName("poster_path"); + if(json_is_string(jPoster)) { + posterPath = posterBaseUrl + json_string_value(jPoster); + } + json_t *jAdult = jMovie.objectByName("adult"); + if(json_is_true(jAdult)) { + adult = true; + } + json_t *collection = jMovie.objectByName("belongs_to_collection"); + if(json_is_object(collection)) { + json_t *colID = json_object_get(collection, "id"); + if (json_is_integer(colID)) { + collectionID = (int)json_integer_value(colID); + } + json_t *colName = json_object_get(collection, "name"); + if(json_is_string(jPoster)) { + collectionName = json_string_value(colName); + } + json_t *colPoster = json_object_get(collection, "poster_path"); + if(json_is_string(colPoster)) { + collectionPosterPath = posterBaseUrl + json_string_value(colPoster); + } + json_t *colBackdrop = json_object_get(collection, "backdrop_path"); + if(json_is_string(colBackdrop)) { + collectionBackdropPath = backdropBaseUrl + json_string_value(colBackdrop); + } + } + json_t *jBudget = jMovie.objectByName("budget"); + if (json_is_integer(jBudget)) { + budget = (int)json_integer_value(jBudget); + } + json_t *aGenres = jMovie.objectByName("genres"); + if(json_is_array(aGenres)) { + size_t numGenres = json_array_size(aGenres); + for (size_t res = 0; res < numGenres; res++) { + json_t *result = json_array_get(aGenres, res); + if (json_is_object(result)) { + json_t *jGenre = json_object_get(result, "name"); + if(json_is_string(jGenre)) { + genres += json_string_value(jGenre); + if ((res+1) < numGenres) + genres += " | "; + } + } + } + } + json_t *jHomepage = jMovie.objectByName("homepage"); + if(json_is_string(jHomepage)) { + homepage = json_string_value(jHomepage); + } + json_t *jIMDB = jMovie.objectByName("imdb_id"); + if(json_is_string(jIMDB)) { + imdbid = json_string_value(jIMDB); + } + json_t *jPopularity = jMovie.objectByName("popularity"); + if(json_is_real(jPopularity)) { + popularity = json_real_value(jPopularity); + } + json_t *jReleaseDate = jMovie.objectByName("release_date"); + if(json_is_string(jReleaseDate)) { + releaseDate = json_string_value(jReleaseDate); + } + json_t *jRevenue = jMovie.objectByName("revenue"); + if (json_is_integer(jRevenue)) { + revenue = (int)json_integer_value(jRevenue); + } + json_t *jRuntime = jMovie.objectByName("runtime"); + if (json_is_integer(jRuntime)) { + runtime = (int)json_integer_value(jRuntime); + } + json_t *jVote = jMovie.objectByName("vote_average"); + if(json_is_real(jVote)) { + voteAverage = json_real_value(jVote); + } + json_t *jTagline = jMovie.objectByName("tagline"); + if(json_is_string(jTagline)) { + tagline = json_string_value(jTagline); + } +} + +int cMovieDbMovie::ParseJSONForMovieId(string movieSearchString) { + //convert searchstring to lower case + transform(movieSearchString.begin(), movieSearchString.end(), movieSearchString.begin(), ::tolower); + cJsonLoader root(json.c_str()); + if (!root.isObject()) { + return -1; + } + json_t *results = root.objectByName("results"); + if(!json_is_array(results)) { + return -1; + } + size_t numResults = json_array_size(results); + for (size_t res = 0; res < numResults; res++) { + json_t *result = json_array_get(results, res); + if (!json_is_object(result)) { + return -1; + } + json_t *title = json_object_get(result, "title"); + if (!json_is_string(title)) { + return -1; + } + string resultTitle = json_string_value(title); + //convert result to lower case + transform(resultTitle.begin(), resultTitle.end(), resultTitle.begin(), ::tolower); + json_t *jId = json_object_get(result, "id"); + if (json_is_integer(jId)) { + int id = (int)json_integer_value(jId); + searchResult sRes; + sRes.id = id; + sRes.distance = sentence_distance(resultTitle, movieSearchString); + resultSet.push_back(sRes); + } + } + return FindBestResult(); +} + +int cMovieDbMovie::FindBestResult(void) { + int resID = -1; + int bestMatch = -1; + int numResults = resultSet.size(); + for (int i=0; i<numResults; i++) { + if (i == 0) { + bestMatch = resultSet[i].distance; + resID = resultSet[i].id; + } else if (resultSet[i].distance < bestMatch) { + bestMatch = resultSet[i].distance; + resID = resultSet[i].id; + } + } + return resID; +} + +void cMovieDbMovie::ReadActors(void) { + stringstream url; + url << baseUrl << "/movie/" << id << "/casts?api_key=" << apiKey; + string actorsJSON; + if (curl.GetUrl(url.str().c_str(), &actorsJSON)) { + cMovieDbActors act(actorsJSON, actorBaseUrl); + act.ParseJSON(&actors); + } +} + +cMovieDBActor *cMovieDbMovie::GetActor(void) { + int numActors = actors.size(); + if ((numActors < 1) || (currentActor >= numActors)) + return NULL; + cMovieDBActor *act = actors[currentActor]; + currentActor++; + return act; +} + +void cMovieDbMovie::Dump(void) { + cout << "-------------- MOVIE DUMP ---------------" << endl; + cout << "Title: " << title << endl; + cout << "Original Title: " << originalTitle << endl; + cout << "Tagline: " << tagline << endl; + cout << "Overview: " << overview << endl; + cout << "BackdropPath: " << backdropPath << endl; + cout << "PosterPath: " << posterPath << endl; + cout << "Adult: " << (adult?"true":"false") << endl; + if (!collectionID) { + cout << "Movie belongs not to a collection" << endl; + } else { + cout << "CollectionID: " << collectionID << endl; + cout << "Collection Name: " << collectionName << endl; + cout << "Collection BackdropPath: " << collectionBackdropPath << endl; + cout << "Collection PosterPath: " << collectionPosterPath << endl; + } + cout << "Budget: " << budget << "$" << endl; + cout << "Revenue: " << revenue << "$" << endl; + cout << "Genres: " << genres << endl; + cout << "Homepage: " << homepage << endl; + cout << "imdbID: " << imdbid << endl; + cout << "Release Date: " << releaseDate << endl; + cout << "Runtime: " << runtime << " min" << endl; + cout << "Popularity: " << popularity << endl; + cout << "Vote Average: " << voteAverage << endl; + int numActors = actors.size(); + cout << "--------------------- " << numActors << " actors found -----------------" << endl; + for (int i=0; i<numActors; i++) { + actors[i]->Dump(); + } +} diff --git a/scraper/themoviedbscraper/moviedbmovie.h b/scraper/themoviedbscraper/moviedbmovie.h new file mode 100644 index 0000000..9e9c2fd --- /dev/null +++ b/scraper/themoviedbscraper/moviedbmovie.h @@ -0,0 +1,68 @@ +#ifndef __TVSCRAPER_MOVIEDBMOVIE_H +#define __TVSCRAPER_MOVIEDBMOVIE_H + +#include "moviedbactor.h" + +using namespace std; + +struct searchResult { + int id; + int distance; +}; + +// --- cMovieDbMovie ------------------------------------------------------------- + +class cMovieDbMovie { +private: + string json; + vector<searchResult> resultSet; + string apiKey; + string baseUrl; + string posterBaseUrl; + string backdropBaseUrl; + string actorBaseUrl; + vector<cMovieDBActor*> actors; + int currentActor; + int FindBestResult(void); +public: + cMovieDbMovie(string json); + virtual ~cMovieDbMovie(void); + int id; + string title; + string originalTitle; + string tagline; + string overview; + string backdropPath; + int backdropWidth; + int backdropHeight; + string posterPath; + int posterWidth; + int posterHeight; + bool adult; + int collectionID; + string collectionName; + string collectionPosterPath; + string collectionBackdropPath; + int budget; + int revenue; + string genres; + string homepage; + string imdbid; + string releaseDate; + int runtime; + float popularity; + float voteAverage; + int ParseJSONForMovieId(string movieSearchString); + void ParseJSON(void); + void SetApiKey(string apiKey) { this->apiKey = apiKey; }; + void SetBaseUrl(string baseUrl) { this->baseUrl = baseUrl; }; + void SetPosterBaseUrl(string url) { posterBaseUrl = url; }; + void SetBackdropBaseUrl(string url) { backdropBaseUrl = url; }; + void SetActorBaseUrl(string url) { actorBaseUrl = url; }; + void ReadActors(void); + cMovieDBActor *GetActor(void); + void Dump(); +}; + + +#endif //__TVSCRAPER_TVDBSERIES_H diff --git a/scraper/themoviedbscraper/themoviedbscraper.c b/scraper/themoviedbscraper/themoviedbscraper.c new file mode 100644 index 0000000..62263a9 --- /dev/null +++ b/scraper/themoviedbscraper/themoviedbscraper.c @@ -0,0 +1,103 @@ + +#include <string> +#include <sstream> +#include <vector> +#include <map> +#include <algorithm> +#include <iostream> +#include <jansson.h> + +#include "../../lib/curl.h" +#include "../../tools/stringhelpers.h" +#include "themoviedbscraper.h" + +using namespace std; + +cMovieDBScraper::cMovieDBScraper(string language) { + apiKey = "abb01b5a277b9c2c60ec0302d83c5ee9"; + this->language = language; + baseURL = "api.themoviedb.org/3"; + posterSize = "w500"; + backdropSize = "w1280"; + actorthumbSize = "h632"; +} + +cMovieDBScraper::~cMovieDBScraper() { +} + +cMovieDbMovie *cMovieDBScraper::Scrap(string movieName, string year) { + int movieID = SearchMovie(movieName, year); + if (movieID < 1) { + return NULL; + } + cMovieDbMovie *movie = ReadMovie(movieID); + if (!movie) + return NULL; + return movie; +} + +bool cMovieDBScraper::Connect(void) { + stringstream url; + url << baseURL << "/configuration?api_key=" << apiKey; + string configJSON; + if (curl.GetUrl(url.str().c_str(), &configJSON)) { + return parseJSON(configJSON); + } + return false; +} + +bool cMovieDBScraper::parseJSON(string jsonString) { + cJsonLoader root(jsonString.c_str()); + if (!root.isObject()) { + return false; + } + json_t *images = root.objectByName("images"); + if(!json_is_object(images)) { + return false; + } + + json_t *imgUrl; + imgUrl = json_object_get(images, "base_url"); + if(!json_is_string(imgUrl)) { + return false; + } + imageUrl = json_string_value(imgUrl); + return true; +} + +int cMovieDBScraper::SearchMovie(string movieName, string year) { + stringstream url; + string movieJSON; + int movieID = -1; + char* escUrl = curl.EscapeUrl(movieName.c_str()); + + url << baseURL << "/search/movie?api_key=" << apiKey << "&query=" << escUrl << "&language=" << language.c_str(); + if (year.size() > 0) + url << "&year=" << year.c_str(); + curl.Free(escUrl); + + if (curl.GetUrl(url.str().c_str(), &movieJSON)) { + cMovieDbMovie movie(movieJSON); + movieID = movie.ParseJSONForMovieId(movieName); + } + + return movieID; +} + +cMovieDbMovie *cMovieDBScraper::ReadMovie(int movieID) { + stringstream url; + url << baseURL << "/movie/" << movieID << "?api_key=" << apiKey << "&language=" << language.c_str(); + string movieJSON; + cMovieDbMovie *movie = NULL; + if (curl.GetUrl(url.str().c_str(), &movieJSON)) { + movie = new cMovieDbMovie(movieJSON); + movie->id = movieID; + movie->SetBaseUrl(baseURL); + movie->SetApiKey(apiKey); + movie->SetPosterBaseUrl(imageUrl + posterSize); + movie->SetBackdropBaseUrl(imageUrl + backdropSize); + movie->SetActorBaseUrl(imageUrl + actorthumbSize); + movie->ParseJSON(); + } + return movie; +} diff --git a/scraper/themoviedbscraper/themoviedbscraper.h b/scraper/themoviedbscraper/themoviedbscraper.h new file mode 100644 index 0000000..d2f449e --- /dev/null +++ b/scraper/themoviedbscraper/themoviedbscraper.h @@ -0,0 +1,31 @@ +#ifndef __TVSCRAPER_MOVIEDBSCRAPER_H +#define __TVSCRAPER_MOVIEDBSCRAPER_H + +#include "moviedbmovie.h" +#include "moviedbactor.h" + +using namespace std; + +// --- cMovieDBScraper ------------------------------------------------------------- + +class cMovieDBScraper { +private: + string apiKey; + string language; + string baseURL; + string imageUrl; + string posterSize; + string backdropSize; + string actorthumbSize; + bool parseJSON(string jsonString); + int SearchMovie(string movieName, string year); +public: + cMovieDBScraper(string language); + virtual ~cMovieDBScraper(void); + bool Connect(void); + cMovieDbMovie *Scrap(string movieName, string year=""); + cMovieDbMovie *ReadMovie(int movieID); +}; + + +#endif //__TVSCRAPER_MOVIEDBSCRAPER_H |