summaryrefslogtreecommitdiff
path: root/scraper/themoviedbscraper
diff options
context:
space:
mode:
authorhorchi <vdr@jwendel.de>2017-03-05 16:39:28 +0100
committerhorchi <vdr@jwendel.de>2017-03-05 16:39:28 +0100
commite2a48d8701f91b8e24fbe9e99e91eb72a87bb749 (patch)
tree726f70554b4ca985a09ef6e30a7fdc8df089993c /scraper/themoviedbscraper
downloadvdr-epg-daemon-e2a48d8701f91b8e24fbe9e99e91eb72a87bb749.tar.gz
vdr-epg-daemon-e2a48d8701f91b8e24fbe9e99e91eb72a87bb749.tar.bz2
git init1.1.103
Diffstat (limited to 'scraper/themoviedbscraper')
-rw-r--r--scraper/themoviedbscraper/moviedbactor.c49
-rw-r--r--scraper/themoviedbscraper/moviedbactor.h44
-rw-r--r--scraper/themoviedbscraper/moviedbmovie.c255
-rw-r--r--scraper/themoviedbscraper/moviedbmovie.h68
-rw-r--r--scraper/themoviedbscraper/themoviedbscraper.c103
-rw-r--r--scraper/themoviedbscraper/themoviedbscraper.h31
6 files changed, 550 insertions, 0 deletions
diff --git a/scraper/themoviedbscraper/moviedbactor.c b/scraper/themoviedbscraper/moviedbactor.c
new file mode 100644
index 0000000..4e81af2
--- /dev/null
+++ b/scraper/themoviedbscraper/moviedbactor.c
@@ -0,0 +1,49 @@
+
+#include <string>
+#include <sstream>
+#include <vector>
+#include <iostream>
+#include <jansson.h>
+#include "moviedbactor.h"
+
+#include "../../tools/stringhelpers.h"
+
+using namespace std;
+
+cMovieDbActors::cMovieDbActors(string json, string actorsBaseUrl) {
+ this->json = json;
+ this->actorsBaseUrl = actorsBaseUrl;
+}
+
+cMovieDbActors::~cMovieDbActors() {
+}
+
+void cMovieDbActors::ParseJSON(vector<cMovieDBActor*> *actors) {
+ cJsonLoader jActors(json.c_str());
+ if (!jActors.isObject()) {
+ return;
+ }
+ json_t *cast = jActors.objectByName("cast");
+ if(!json_is_array(cast)) {
+ return;
+ }
+ size_t numActors = json_array_size(cast);
+ for (size_t i = 0; i < numActors; i++) {
+ json_t *jActor = json_array_get(cast, i);
+ if (!json_is_object(jActor)) {
+ return;
+ }
+ json_t *jId = json_object_get(jActor, "id");
+ json_t *jName = json_object_get(jActor, "name");
+ json_t *jRole = json_object_get(jActor, "character");
+ json_t *jPath = json_object_get(jActor, "profile_path");
+ if (!json_is_integer(jId) || !json_is_string(jName) || !json_is_string(jRole) || !json_is_string(jPath))
+ return;
+ cMovieDBActor *actor = new cMovieDBActor();
+ actor->id = json_integer_value(jId);
+ actor->name = json_string_value(jName);
+ actor->role = json_string_value(jRole);
+ actor->thumbUrl = actorsBaseUrl + json_string_value(jPath);
+ actors->push_back(actor);
+ }
+}
diff --git a/scraper/themoviedbscraper/moviedbactor.h b/scraper/themoviedbscraper/moviedbactor.h
new file mode 100644
index 0000000..1b51682
--- /dev/null
+++ b/scraper/themoviedbscraper/moviedbactor.h
@@ -0,0 +1,44 @@
+#ifndef __TVSCRAPER_MOVIEDBACTORS_H
+#define __TVSCRAPER_MOVIEDBACTORS_H
+
+using namespace std;
+
+// --- cMovieDBActor -------------------------------------------------------------
+class cMovieDBActor {
+public:
+ cMovieDBActor(void) {
+ id = 0;
+ thumbUrl = "";
+ name = "";
+ role = "";
+ width = 370;
+ height = 556;
+ };
+ int id;
+ string thumbUrl;
+ int width;
+ int height;
+ string name;
+ string role;
+ void Dump(void) {
+ cout << "id: " << id << endl;
+ cout << "name: " << name << endl;
+ cout << "role: " << role << endl;
+ cout << "thumbnail: " << thumbUrl << endl;
+ };
+};
+
+// --- cMovieDBActors -------------------------------------------------------------
+
+class cMovieDbActors {
+private:
+ string json;
+ string actorsBaseUrl;
+public:
+ cMovieDbActors(string json, string actorsBaseUrl);
+ virtual ~cMovieDbActors(void);
+ void ParseJSON(vector<cMovieDBActor*> *actors);
+};
+
+
+#endif //__TVSCRAPER_MOVIEDBACTORS_H
diff --git a/scraper/themoviedbscraper/moviedbmovie.c b/scraper/themoviedbscraper/moviedbmovie.c
new file mode 100644
index 0000000..8060930
--- /dev/null
+++ b/scraper/themoviedbscraper/moviedbmovie.c
@@ -0,0 +1,255 @@
+
+#include <string>
+#include <sstream>
+#include <vector>
+#include <algorithm>
+#include <iostream>
+#include <jansson.h>
+
+#include "../../lib/curl.h"
+#include "../../tools/fuzzy.h"
+#include "../../tools/stringhelpers.h"
+#include "moviedbmovie.h"
+
+using namespace std;
+
+cMovieDbMovie::cMovieDbMovie(string json) {
+ this->json = json;
+ title = "";
+ originalTitle = "";
+ tagline = "";
+ overview = "";
+ backdropPath = "";
+ posterPath = "";
+ adult = false;
+ collectionID = 0;
+ collectionName = "";
+ collectionPosterPath = "";
+ collectionBackdropPath = "";
+ budget = 0;
+ revenue = 0;
+ genres = "";
+ homepage = "";
+ imdbid = "";
+ releaseDate = "";
+ runtime = 0;
+ popularity = 0.0;
+ voteAverage = 0.0;
+ currentActor = 0;
+ backdropWidth = 1280;
+ backdropHeight = 720;
+ posterWidth = 500;
+ posterHeight = 750;
+}
+
+cMovieDbMovie::~cMovieDbMovie() {
+ for(std::vector<cMovieDBActor*>::const_iterator it = actors.begin(); it != actors.end(); it++) {
+ delete *it;
+ }
+ actors.clear();
+}
+
+void cMovieDbMovie::ParseJSON(void) {
+ cJsonLoader jMovie(json.c_str());
+ if (!jMovie.isObject()) {
+ return;
+ }
+ json_t *jTitle = jMovie.objectByName("title");
+ if(json_is_string(jTitle)) {
+ title = json_string_value(jTitle);;
+ }
+ json_t *jOriginalTitle = jMovie.objectByName("original_title");
+ if(json_is_string(jOriginalTitle)) {
+ originalTitle = json_string_value(jOriginalTitle);
+ }
+ json_t *jOverview = jMovie.objectByName("overview");
+ if(json_is_string(jOverview)) {
+ overview = json_string_value(jOverview);
+ }
+ json_t *jBackdrop = jMovie.objectByName("backdrop_path");
+ if(json_is_string(jBackdrop)) {
+ backdropPath = backdropBaseUrl + json_string_value(jBackdrop);
+ }
+ json_t *jPoster = jMovie.objectByName("poster_path");
+ if(json_is_string(jPoster)) {
+ posterPath = posterBaseUrl + json_string_value(jPoster);
+ }
+ json_t *jAdult = jMovie.objectByName("adult");
+ if(json_is_true(jAdult)) {
+ adult = true;
+ }
+ json_t *collection = jMovie.objectByName("belongs_to_collection");
+ if(json_is_object(collection)) {
+ json_t *colID = json_object_get(collection, "id");
+ if (json_is_integer(colID)) {
+ collectionID = (int)json_integer_value(colID);
+ }
+ json_t *colName = json_object_get(collection, "name");
+ if(json_is_string(jPoster)) {
+ collectionName = json_string_value(colName);
+ }
+ json_t *colPoster = json_object_get(collection, "poster_path");
+ if(json_is_string(colPoster)) {
+ collectionPosterPath = posterBaseUrl + json_string_value(colPoster);
+ }
+ json_t *colBackdrop = json_object_get(collection, "backdrop_path");
+ if(json_is_string(colBackdrop)) {
+ collectionBackdropPath = backdropBaseUrl + json_string_value(colBackdrop);
+ }
+ }
+ json_t *jBudget = jMovie.objectByName("budget");
+ if (json_is_integer(jBudget)) {
+ budget = (int)json_integer_value(jBudget);
+ }
+ json_t *aGenres = jMovie.objectByName("genres");
+ if(json_is_array(aGenres)) {
+ size_t numGenres = json_array_size(aGenres);
+ for (size_t res = 0; res < numGenres; res++) {
+ json_t *result = json_array_get(aGenres, res);
+ if (json_is_object(result)) {
+ json_t *jGenre = json_object_get(result, "name");
+ if(json_is_string(jGenre)) {
+ genres += json_string_value(jGenre);
+ if ((res+1) < numGenres)
+ genres += " | ";
+ }
+ }
+ }
+ }
+ json_t *jHomepage = jMovie.objectByName("homepage");
+ if(json_is_string(jHomepage)) {
+ homepage = json_string_value(jHomepage);
+ }
+ json_t *jIMDB = jMovie.objectByName("imdb_id");
+ if(json_is_string(jIMDB)) {
+ imdbid = json_string_value(jIMDB);
+ }
+ json_t *jPopularity = jMovie.objectByName("popularity");
+ if(json_is_real(jPopularity)) {
+ popularity = json_real_value(jPopularity);
+ }
+ json_t *jReleaseDate = jMovie.objectByName("release_date");
+ if(json_is_string(jReleaseDate)) {
+ releaseDate = json_string_value(jReleaseDate);
+ }
+ json_t *jRevenue = jMovie.objectByName("revenue");
+ if (json_is_integer(jRevenue)) {
+ revenue = (int)json_integer_value(jRevenue);
+ }
+ json_t *jRuntime = jMovie.objectByName("runtime");
+ if (json_is_integer(jRuntime)) {
+ runtime = (int)json_integer_value(jRuntime);
+ }
+ json_t *jVote = jMovie.objectByName("vote_average");
+ if(json_is_real(jVote)) {
+ voteAverage = json_real_value(jVote);
+ }
+ json_t *jTagline = jMovie.objectByName("tagline");
+ if(json_is_string(jTagline)) {
+ tagline = json_string_value(jTagline);
+ }
+}
+
+int cMovieDbMovie::ParseJSONForMovieId(string movieSearchString) {
+ //convert searchstring to lower case
+ transform(movieSearchString.begin(), movieSearchString.end(), movieSearchString.begin(), ::tolower);
+ cJsonLoader root(json.c_str());
+ if (!root.isObject()) {
+ return -1;
+ }
+ json_t *results = root.objectByName("results");
+ if(!json_is_array(results)) {
+ return -1;
+ }
+ size_t numResults = json_array_size(results);
+ for (size_t res = 0; res < numResults; res++) {
+ json_t *result = json_array_get(results, res);
+ if (!json_is_object(result)) {
+ return -1;
+ }
+ json_t *title = json_object_get(result, "title");
+ if (!json_is_string(title)) {
+ return -1;
+ }
+ string resultTitle = json_string_value(title);
+ //convert result to lower case
+ transform(resultTitle.begin(), resultTitle.end(), resultTitle.begin(), ::tolower);
+ json_t *jId = json_object_get(result, "id");
+ if (json_is_integer(jId)) {
+ int id = (int)json_integer_value(jId);
+ searchResult sRes;
+ sRes.id = id;
+ sRes.distance = sentence_distance(resultTitle, movieSearchString);
+ resultSet.push_back(sRes);
+ }
+ }
+ return FindBestResult();
+}
+
+int cMovieDbMovie::FindBestResult(void) {
+ int resID = -1;
+ int bestMatch = -1;
+ int numResults = resultSet.size();
+ for (int i=0; i<numResults; i++) {
+ if (i == 0) {
+ bestMatch = resultSet[i].distance;
+ resID = resultSet[i].id;
+ } else if (resultSet[i].distance < bestMatch) {
+ bestMatch = resultSet[i].distance;
+ resID = resultSet[i].id;
+ }
+ }
+ return resID;
+}
+
+void cMovieDbMovie::ReadActors(void) {
+ stringstream url;
+ url << baseUrl << "/movie/" << id << "/casts?api_key=" << apiKey;
+ string actorsJSON;
+ if (curl.GetUrl(url.str().c_str(), &actorsJSON)) {
+ cMovieDbActors act(actorsJSON, actorBaseUrl);
+ act.ParseJSON(&actors);
+ }
+}
+
+cMovieDBActor *cMovieDbMovie::GetActor(void) {
+ int numActors = actors.size();
+ if ((numActors < 1) || (currentActor >= numActors))
+ return NULL;
+ cMovieDBActor *act = actors[currentActor];
+ currentActor++;
+ return act;
+}
+
+void cMovieDbMovie::Dump(void) {
+ cout << "-------------- MOVIE DUMP ---------------" << endl;
+ cout << "Title: " << title << endl;
+ cout << "Original Title: " << originalTitle << endl;
+ cout << "Tagline: " << tagline << endl;
+ cout << "Overview: " << overview << endl;
+ cout << "BackdropPath: " << backdropPath << endl;
+ cout << "PosterPath: " << posterPath << endl;
+ cout << "Adult: " << (adult?"true":"false") << endl;
+ if (!collectionID) {
+ cout << "Movie belongs not to a collection" << endl;
+ } else {
+ cout << "CollectionID: " << collectionID << endl;
+ cout << "Collection Name: " << collectionName << endl;
+ cout << "Collection BackdropPath: " << collectionBackdropPath << endl;
+ cout << "Collection PosterPath: " << collectionPosterPath << endl;
+ }
+ cout << "Budget: " << budget << "$" << endl;
+ cout << "Revenue: " << revenue << "$" << endl;
+ cout << "Genres: " << genres << endl;
+ cout << "Homepage: " << homepage << endl;
+ cout << "imdbID: " << imdbid << endl;
+ cout << "Release Date: " << releaseDate << endl;
+ cout << "Runtime: " << runtime << " min" << endl;
+ cout << "Popularity: " << popularity << endl;
+ cout << "Vote Average: " << voteAverage << endl;
+ int numActors = actors.size();
+ cout << "--------------------- " << numActors << " actors found -----------------" << endl;
+ for (int i=0; i<numActors; i++) {
+ actors[i]->Dump();
+ }
+}
diff --git a/scraper/themoviedbscraper/moviedbmovie.h b/scraper/themoviedbscraper/moviedbmovie.h
new file mode 100644
index 0000000..9e9c2fd
--- /dev/null
+++ b/scraper/themoviedbscraper/moviedbmovie.h
@@ -0,0 +1,68 @@
+#ifndef __TVSCRAPER_MOVIEDBMOVIE_H
+#define __TVSCRAPER_MOVIEDBMOVIE_H
+
+#include "moviedbactor.h"
+
+using namespace std;
+
+struct searchResult {
+ int id;
+ int distance;
+};
+
+// --- cMovieDbMovie -------------------------------------------------------------
+
+class cMovieDbMovie {
+private:
+ string json;
+ vector<searchResult> resultSet;
+ string apiKey;
+ string baseUrl;
+ string posterBaseUrl;
+ string backdropBaseUrl;
+ string actorBaseUrl;
+ vector<cMovieDBActor*> actors;
+ int currentActor;
+ int FindBestResult(void);
+public:
+ cMovieDbMovie(string json);
+ virtual ~cMovieDbMovie(void);
+ int id;
+ string title;
+ string originalTitle;
+ string tagline;
+ string overview;
+ string backdropPath;
+ int backdropWidth;
+ int backdropHeight;
+ string posterPath;
+ int posterWidth;
+ int posterHeight;
+ bool adult;
+ int collectionID;
+ string collectionName;
+ string collectionPosterPath;
+ string collectionBackdropPath;
+ int budget;
+ int revenue;
+ string genres;
+ string homepage;
+ string imdbid;
+ string releaseDate;
+ int runtime;
+ float popularity;
+ float voteAverage;
+ int ParseJSONForMovieId(string movieSearchString);
+ void ParseJSON(void);
+ void SetApiKey(string apiKey) { this->apiKey = apiKey; };
+ void SetBaseUrl(string baseUrl) { this->baseUrl = baseUrl; };
+ void SetPosterBaseUrl(string url) { posterBaseUrl = url; };
+ void SetBackdropBaseUrl(string url) { backdropBaseUrl = url; };
+ void SetActorBaseUrl(string url) { actorBaseUrl = url; };
+ void ReadActors(void);
+ cMovieDBActor *GetActor(void);
+ void Dump();
+};
+
+
+#endif //__TVSCRAPER_TVDBSERIES_H
diff --git a/scraper/themoviedbscraper/themoviedbscraper.c b/scraper/themoviedbscraper/themoviedbscraper.c
new file mode 100644
index 0000000..62263a9
--- /dev/null
+++ b/scraper/themoviedbscraper/themoviedbscraper.c
@@ -0,0 +1,103 @@
+
+#include <string>
+#include <sstream>
+#include <vector>
+#include <map>
+#include <algorithm>
+#include <iostream>
+#include <jansson.h>
+
+#include "../../lib/curl.h"
+#include "../../tools/stringhelpers.h"
+#include "themoviedbscraper.h"
+
+using namespace std;
+
+cMovieDBScraper::cMovieDBScraper(string language) {
+ apiKey = "abb01b5a277b9c2c60ec0302d83c5ee9";
+ this->language = language;
+ baseURL = "api.themoviedb.org/3";
+ posterSize = "w500";
+ backdropSize = "w1280";
+ actorthumbSize = "h632";
+}
+
+cMovieDBScraper::~cMovieDBScraper() {
+}
+
+cMovieDbMovie *cMovieDBScraper::Scrap(string movieName, string year) {
+ int movieID = SearchMovie(movieName, year);
+ if (movieID < 1) {
+ return NULL;
+ }
+ cMovieDbMovie *movie = ReadMovie(movieID);
+ if (!movie)
+ return NULL;
+ return movie;
+}
+
+bool cMovieDBScraper::Connect(void) {
+ stringstream url;
+ url << baseURL << "/configuration?api_key=" << apiKey;
+ string configJSON;
+ if (curl.GetUrl(url.str().c_str(), &configJSON)) {
+ return parseJSON(configJSON);
+ }
+ return false;
+}
+
+bool cMovieDBScraper::parseJSON(string jsonString) {
+ cJsonLoader root(jsonString.c_str());
+ if (!root.isObject()) {
+ return false;
+ }
+ json_t *images = root.objectByName("images");
+ if(!json_is_object(images)) {
+ return false;
+ }
+
+ json_t *imgUrl;
+ imgUrl = json_object_get(images, "base_url");
+ if(!json_is_string(imgUrl)) {
+ return false;
+ }
+ imageUrl = json_string_value(imgUrl);
+ return true;
+}
+
+int cMovieDBScraper::SearchMovie(string movieName, string year) {
+ stringstream url;
+ string movieJSON;
+ int movieID = -1;
+ char* escUrl = curl.EscapeUrl(movieName.c_str());
+
+ url << baseURL << "/search/movie?api_key=" << apiKey << "&query=" << escUrl << "&language=" << language.c_str();
+ if (year.size() > 0)
+ url << "&year=" << year.c_str();
+ curl.Free(escUrl);
+
+ if (curl.GetUrl(url.str().c_str(), &movieJSON)) {
+ cMovieDbMovie movie(movieJSON);
+ movieID = movie.ParseJSONForMovieId(movieName);
+ }
+
+ return movieID;
+}
+
+cMovieDbMovie *cMovieDBScraper::ReadMovie(int movieID) {
+ stringstream url;
+ url << baseURL << "/movie/" << movieID << "?api_key=" << apiKey << "&language=" << language.c_str();
+ string movieJSON;
+ cMovieDbMovie *movie = NULL;
+ if (curl.GetUrl(url.str().c_str(), &movieJSON)) {
+ movie = new cMovieDbMovie(movieJSON);
+ movie->id = movieID;
+ movie->SetBaseUrl(baseURL);
+ movie->SetApiKey(apiKey);
+ movie->SetPosterBaseUrl(imageUrl + posterSize);
+ movie->SetBackdropBaseUrl(imageUrl + backdropSize);
+ movie->SetActorBaseUrl(imageUrl + actorthumbSize);
+ movie->ParseJSON();
+ }
+ return movie;
+}
diff --git a/scraper/themoviedbscraper/themoviedbscraper.h b/scraper/themoviedbscraper/themoviedbscraper.h
new file mode 100644
index 0000000..d2f449e
--- /dev/null
+++ b/scraper/themoviedbscraper/themoviedbscraper.h
@@ -0,0 +1,31 @@
+#ifndef __TVSCRAPER_MOVIEDBSCRAPER_H
+#define __TVSCRAPER_MOVIEDBSCRAPER_H
+
+#include "moviedbmovie.h"
+#include "moviedbactor.h"
+
+using namespace std;
+
+// --- cMovieDBScraper -------------------------------------------------------------
+
+class cMovieDBScraper {
+private:
+ string apiKey;
+ string language;
+ string baseURL;
+ string imageUrl;
+ string posterSize;
+ string backdropSize;
+ string actorthumbSize;
+ bool parseJSON(string jsonString);
+ int SearchMovie(string movieName, string year);
+public:
+ cMovieDBScraper(string language);
+ virtual ~cMovieDBScraper(void);
+ bool Connect(void);
+ cMovieDbMovie *Scrap(string movieName, string year="");
+ cMovieDbMovie *ReadMovie(int movieID);
+};
+
+
+#endif //__TVSCRAPER_MOVIEDBSCRAPER_H