git init1.1.103

author: horchi <vdr@jwendel.de> 2017-03-05 16:39:28 +0100
committer: horchi <vdr@jwendel.de> 2017-03-05 16:39:28 +0100
commit: e2a48d8701f91b8e24fbe9e99e91eb72a87bb749 (patch)
tree: 726f70554b4ca985a09ef6e30a7fdc8df089993c /scraper/themoviedbscraper
download: vdr-epg-daemon-e2a48d8701f91b8e24fbe9e99e91eb72a87bb749.tar.gz
vdr-epg-daemon-e2a48d8701f91b8e24fbe9e99e91eb72a87bb749.tar.bz2
6 files changed, 550 insertions, 0 deletions
diff --git a/scraper/themoviedbscraper/moviedbactor.c b/scraper/themoviedbscraper/moviedbactor.c
new file mode 100644
index 0000000..4e81af2
--- /dev/null
+++ b/scraper/themoviedbscraper/moviedbactor.c
@@ -0,0 +1,49 @@
+
+#include <string>
+#include <sstream>
+#include <vector>
+#include <iostream>
+#include <jansson.h>
+#include "moviedbactor.h"
+
+#include "../../tools/stringhelpers.h"
+
+using namespace std;
+
+cMovieDbActors::cMovieDbActors(string json, string actorsBaseUrl) {
+    this->json = json;
+    this->actorsBaseUrl = actorsBaseUrl;
+}
+
+cMovieDbActors::~cMovieDbActors() {
+}
+
+void cMovieDbActors::ParseJSON(vector<cMovieDBActor*> *actors) {
+    cJsonLoader jActors(json.c_str());
+    if (!jActors.isObject()) {
+        return;
+    }
+    json_t *cast = jActors.objectByName("cast");
+    if(!json_is_array(cast)) {
+        return;
+    }
+    size_t numActors = json_array_size(cast);
+    for (size_t i = 0; i < numActors; i++) {
+        json_t *jActor = json_array_get(cast, i);
+        if (!json_is_object(jActor)) {
+            return;
+        }
+        json_t *jId = json_object_get(jActor, "id");
+        json_t *jName = json_object_get(jActor, "name");
+        json_t *jRole = json_object_get(jActor, "character");
+        json_t *jPath = json_object_get(jActor, "profile_path");
+        if (!json_is_integer(jId) || !json_is_string(jName) || !json_is_string(jRole) || !json_is_string(jPath))
+            return;
+        cMovieDBActor *actor = new cMovieDBActor();
+        actor->id = json_integer_value(jId);
+        actor->name = json_string_value(jName);
+        actor->role = json_string_value(jRole);
+        actor->thumbUrl = actorsBaseUrl + json_string_value(jPath);
+        actors->push_back(actor);
+    }
+}
diff --git a/scraper/themoviedbscraper/moviedbactor.h b/scraper/themoviedbscraper/moviedbactor.h
new file mode 100644
index 0000000..1b51682
--- /dev/null
+++ b/scraper/themoviedbscraper/moviedbactor.h
@@ -0,0 +1,44 @@
+#ifndef __TVSCRAPER_MOVIEDBACTORS_H
+#define __TVSCRAPER_MOVIEDBACTORS_H
+
+using namespace std;
+
+// --- cMovieDBActor -------------------------------------------------------------
+class cMovieDBActor {
+public:
+    cMovieDBActor(void) {
+        id = 0;
+        thumbUrl = "";
+        name = "";
+        role = "";
+        width = 370;
+        height = 556;
+    };
+    int id;
+    string thumbUrl;
+    int width;
+    int height;
+    string name;
+    string role;
+    void Dump(void) {
+        cout << "id: " << id << endl;
+        cout << "name: " << name << endl;
+        cout << "role: " << role << endl;
+        cout << "thumbnail: " << thumbUrl << endl;
+    };
+};
+
+// --- cMovieDBActors -------------------------------------------------------------
+
+class cMovieDbActors {
+private:
+    string json;
+    string actorsBaseUrl;
+public:
+    cMovieDbActors(string json, string actorsBaseUrl);
+    virtual ~cMovieDbActors(void);
+    void ParseJSON(vector<cMovieDBActor*> *actors);
+};
+
+
+#endif //__TVSCRAPER_MOVIEDBACTORS_H
diff --git a/scraper/themoviedbscraper/moviedbmovie.c b/scraper/themoviedbscraper/moviedbmovie.c
new file mode 100644
index 0000000..8060930
--- /dev/null
+++ b/scraper/themoviedbscraper/moviedbmovie.c
@@ -0,0 +1,255 @@
+
+#include <string>
+#include <sstream>
+#include <vector>
+#include <algorithm>
+#include <iostream>
+#include <jansson.h>
+
+#include "../../lib/curl.h"
+#include "../../tools/fuzzy.h"
+#include "../../tools/stringhelpers.h"
+#include "moviedbmovie.h"
+
+using namespace std;
+
+cMovieDbMovie::cMovieDbMovie(string json) {
+    this->json = json;
+    title = "";
+    originalTitle = "";
+    tagline = "";
+    overview = "";
+    backdropPath = "";
+    posterPath = "";
+    adult = false;
+    collectionID = 0;
+    collectionName = "";
+    collectionPosterPath = "";
+    collectionBackdropPath = "";
+    budget = 0;
+    revenue = 0;
+    genres = "";
+    homepage = "";
+    imdbid = "";
+    releaseDate = "";
+    runtime = 0;
+    popularity = 0.0;
+    voteAverage = 0.0;
+    currentActor = 0;
+    backdropWidth = 1280;
+    backdropHeight = 720;
+    posterWidth = 500;
+    posterHeight = 750;
+}
+
+cMovieDbMovie::~cMovieDbMovie() {
+    for(std::vector<cMovieDBActor*>::const_iterator it = actors.begin(); it != actors.end(); it++) {
+        delete *it;
+    }
+    actors.clear();
+}
+
+void cMovieDbMovie::ParseJSON(void) {
+    cJsonLoader jMovie(json.c_str());
+    if (!jMovie.isObject()) {
+        return;
+    }
+    json_t *jTitle = jMovie.objectByName("title");
+    if(json_is_string(jTitle)) {
+        title = json_string_value(jTitle);;
+    }
+    json_t *jOriginalTitle = jMovie.objectByName("original_title");
+    if(json_is_string(jOriginalTitle)) {
+        originalTitle = json_string_value(jOriginalTitle);
+    }
+    json_t *jOverview = jMovie.objectByName("overview");
+    if(json_is_string(jOverview)) {
+        overview = json_string_value(jOverview);
+    }
+    json_t *jBackdrop = jMovie.objectByName("backdrop_path");
+    if(json_is_string(jBackdrop)) {
+        backdropPath = backdropBaseUrl + json_string_value(jBackdrop);
+    }
+    json_t *jPoster = jMovie.objectByName("poster_path");
+    if(json_is_string(jPoster)) {
+        posterPath = posterBaseUrl + json_string_value(jPoster);
+    }
+    json_t *jAdult = jMovie.objectByName("adult");
+    if(json_is_true(jAdult)) {
+        adult = true;
+    }
+    json_t *collection = jMovie.objectByName("belongs_to_collection");
+    if(json_is_object(collection)) {
+        json_t *colID = json_object_get(collection, "id");
+        if (json_is_integer(colID)) {
+            collectionID = (int)json_integer_value(colID);
+        }
+        json_t *colName = json_object_get(collection, "name");
+        if(json_is_string(jPoster)) {
+            collectionName = json_string_value(colName);
+        }
+        json_t *colPoster = json_object_get(collection, "poster_path");
+        if(json_is_string(colPoster)) {
+            collectionPosterPath = posterBaseUrl + json_string_value(colPoster);
+        }
+        json_t *colBackdrop = json_object_get(collection, "backdrop_path");
+        if(json_is_string(colBackdrop)) {
+            collectionBackdropPath = backdropBaseUrl + json_string_value(colBackdrop);
+        }
+    }
+    json_t *jBudget = jMovie.objectByName("budget");
+    if (json_is_integer(jBudget)) {
+        budget = (int)json_integer_value(jBudget);
+    }
+    json_t *aGenres = jMovie.objectByName("genres");
+    if(json_is_array(aGenres)) {
+        size_t numGenres = json_array_size(aGenres);
+        for (size_t res = 0; res < numGenres; res++) {
+            json_t *result = json_array_get(aGenres, res);
+            if (json_is_object(result)) {
+                json_t *jGenre = json_object_get(result, "name");
+                if(json_is_string(jGenre)) {
+                    genres += json_string_value(jGenre);
+                    if ((res+1) < numGenres)
+                        genres += " | ";
+                }
+            }
+        }
+    }
+    json_t *jHomepage = jMovie.objectByName("homepage");
+    if(json_is_string(jHomepage)) {
+        homepage = json_string_value(jHomepage);
+    }
+    json_t *jIMDB = jMovie.objectByName("imdb_id");
+    if(json_is_string(jIMDB)) {
+        imdbid = json_string_value(jIMDB);
+    }
+    json_t *jPopularity = jMovie.objectByName("popularity");
+    if(json_is_real(jPopularity)) {
+        popularity = json_real_value(jPopularity);
+    }
+    json_t *jReleaseDate = jMovie.objectByName("release_date");
+    if(json_is_string(jReleaseDate)) {
+        releaseDate = json_string_value(jReleaseDate);
+    }
+    json_t *jRevenue = jMovie.objectByName("revenue");
+    if (json_is_integer(jRevenue)) {
+        revenue = (int)json_integer_value(jRevenue);
+    }
+    json_t *jRuntime = jMovie.objectByName("runtime");
+    if (json_is_integer(jRuntime)) {
+        runtime = (int)json_integer_value(jRuntime);
+    }
+    json_t *jVote = jMovie.objectByName("vote_average");
+    if(json_is_real(jVote)) {
+        voteAverage = json_real_value(jVote);
+    }
+    json_t *jTagline = jMovie.objectByName("tagline");
+    if(json_is_string(jTagline)) {
+        tagline = json_string_value(jTagline);
+    }
+}
+
+int cMovieDbMovie::ParseJSONForMovieId(string movieSearchString) {
+    //convert searchstring to lower case
+    transform(movieSearchString.begin(), movieSearchString.end(), movieSearchString.begin(), ::tolower);
+    cJsonLoader root(json.c_str());
+    if (!root.isObject()) {
+        return -1;
+    }
+    json_t *results = root.objectByName("results");
+    if(!json_is_array(results)) {
+        return -1;
+    }
+    size_t numResults = json_array_size(results);
+    for (size_t res = 0; res < numResults; res++) {
+        json_t *result = json_array_get(results, res);
+        if (!json_is_object(result)) {
+            return -1;
+        }
+        json_t *title = json_object_get(result, "title");
+        if (!json_is_string(title)) {
+            return -1;
+        }
+        string resultTitle = json_string_value(title);
+        //convert result to lower case
+        transform(resultTitle.begin(), resultTitle.end(), resultTitle.begin(), ::tolower);
+        json_t *jId = json_object_get(result, "id");
+        if (json_is_integer(jId)) {
+            int id = (int)json_integer_value(jId);
+            searchResult sRes;
+            sRes.id = id;
+            sRes.distance = sentence_distance(resultTitle, movieSearchString);
+            resultSet.push_back(sRes);
+        }
+    }
+    return FindBestResult();
+}
+
+int cMovieDbMovie::FindBestResult(void) {
+    int resID = -1;
+    int bestMatch = -1;
+    int numResults = resultSet.size();
+    for (int i=0; i<numResults; i++) {
+        if (i == 0) {
+            bestMatch = resultSet[i].distance;
+            resID = resultSet[i].id;
+        } else if (resultSet[i].distance < bestMatch) {
+            bestMatch = resultSet[i].distance;
+            resID = resultSet[i].id;
+        }
+    }
+    return resID;
+}
+
+void cMovieDbMovie::ReadActors(void) {
+    stringstream url;
+    url << baseUrl << "/movie/" << id << "/casts?api_key=" << apiKey;
+    string actorsJSON;
+    if (curl.GetUrl(url.str().c_str(), &actorsJSON)) {
+        cMovieDbActors act(actorsJSON, actorBaseUrl);
+        act.ParseJSON(&actors);
+    }
+}
+
+cMovieDBActor *cMovieDbMovie::GetActor(void) {
+    int numActors = actors.size();
+    if ((numActors < 1) || (currentActor >= numActors))
+        return NULL;
+    cMovieDBActor *act = actors[currentActor];
+    currentActor++;
+    return act;
+}
+
+void cMovieDbMovie::Dump(void) {
+    cout << "-------------- MOVIE DUMP ---------------" << endl;
+    cout << "Title: " << title << endl;
+    cout << "Original Title: " << originalTitle << endl;
+    cout << "Tagline: " << tagline << endl;
+    cout << "Overview: " << overview << endl;
+    cout << "BackdropPath: " << backdropPath << endl;
+    cout << "PosterPath: " << posterPath << endl;
+    cout << "Adult: " << (adult?"true":"false") << endl;
+    if (!collectionID) {
+        cout << "Movie belongs not to a collection" << endl;
+    } else {
+        cout << "CollectionID: " << collectionID << endl;
+        cout << "Collection Name: " << collectionName << endl;
+        cout << "Collection BackdropPath: " << collectionBackdropPath << endl;
+        cout << "Collection PosterPath: " <<  collectionPosterPath << endl;
+    }
+    cout << "Budget: " << budget << "$" << endl;
+    cout << "Revenue: " << revenue << "$" << endl;
+    cout << "Genres: " << genres << endl;
+    cout << "Homepage: " << homepage << endl;
+    cout << "imdbID: " << imdbid << endl;
+    cout << "Release Date: " << releaseDate << endl;
+    cout << "Runtime: " << runtime << " min" << endl;
+    cout << "Popularity: " << popularity << endl;
+    cout << "Vote Average: " << voteAverage << endl;
+    int numActors = actors.size();
+    cout << "--------------------- " << numActors << " actors found -----------------" << endl;
+    for (int i=0; i<numActors; i++) {
+        actors[i]->Dump();
+    }
+}
diff --git a/scraper/themoviedbscraper/moviedbmovie.h b/scraper/themoviedbscraper/moviedbmovie.h
new file mode 100644
index 0000000..9e9c2fd
--- /dev/null
+++ b/scraper/themoviedbscraper/moviedbmovie.h
@@ -0,0 +1,68 @@
+#ifndef __TVSCRAPER_MOVIEDBMOVIE_H
+#define __TVSCRAPER_MOVIEDBMOVIE_H
+
+#include "moviedbactor.h"
+
+using namespace std;
+
+struct searchResult {
+    int id;
+    int distance;
+};
+
+// --- cMovieDbMovie -------------------------------------------------------------
+
+class cMovieDbMovie {
+private:
+    string json;
+    vector<searchResult> resultSet;
+    string apiKey;
+    string baseUrl;
+    string posterBaseUrl;
+    string backdropBaseUrl;
+    string actorBaseUrl;
+    vector<cMovieDBActor*> actors;
+    int currentActor;
+    int FindBestResult(void);
+public:
+    cMovieDbMovie(string json);
+    virtual ~cMovieDbMovie(void);
+    int id;
+    string title;
+    string originalTitle;
+    string tagline;    
+    string overview;
+    string backdropPath;
+    int backdropWidth;
+    int backdropHeight;
+    string posterPath;
+    int posterWidth;
+    int posterHeight;
+    bool adult;
+    int collectionID;
+    string collectionName;
+    string collectionPosterPath;
+    string collectionBackdropPath;
+    int budget;
+    int revenue;
+    string genres;
+    string homepage;
+    string imdbid;
+    string releaseDate;
+    int runtime;
+    float popularity;
+    float voteAverage;
+    int ParseJSONForMovieId(string movieSearchString);
+    void ParseJSON(void);
+    void SetApiKey(string apiKey) { this->apiKey = apiKey; };
+    void SetBaseUrl(string baseUrl) { this->baseUrl = baseUrl; };
+    void SetPosterBaseUrl(string url) { posterBaseUrl = url; };
+    void SetBackdropBaseUrl(string url) { backdropBaseUrl = url; };
+    void SetActorBaseUrl(string url) { actorBaseUrl = url; };
+    void ReadActors(void);
+    cMovieDBActor *GetActor(void);
+    void Dump();
+};
+
+
+#endif //__TVSCRAPER_TVDBSERIES_H
diff --git a/scraper/themoviedbscraper/themoviedbscraper.c b/scraper/themoviedbscraper/themoviedbscraper.c
new file mode 100644
index 0000000..62263a9
--- /dev/null
+++ b/scraper/themoviedbscraper/themoviedbscraper.c
@@ -0,0 +1,103 @@
+
+#include <string>
+#include <sstream>
+#include <vector>
+#include <map>
+#include <algorithm>
+#include <iostream>
+#include <jansson.h>
+
+#include "../../lib/curl.h"
+#include "../../tools/stringhelpers.h"
+#include "themoviedbscraper.h"
+
+using namespace std;
+
+cMovieDBScraper::cMovieDBScraper(string language) {
+    apiKey = "abb01b5a277b9c2c60ec0302d83c5ee9";
+    this->language = language;
+    baseURL = "api.themoviedb.org/3";
+    posterSize = "w500";
+    backdropSize = "w1280";
+    actorthumbSize = "h632";
+}
+
+cMovieDBScraper::~cMovieDBScraper() {
+}
+
+cMovieDbMovie *cMovieDBScraper::Scrap(string movieName, string year) {
+    int movieID = SearchMovie(movieName, year);
+    if (movieID < 1) {
+        return NULL;
+    }
+    cMovieDbMovie *movie = ReadMovie(movieID);
+    if (!movie)
+        return NULL;
+    return movie;
+}
+
+bool cMovieDBScraper::Connect(void) {
+    stringstream url;
+    url << baseURL << "/configuration?api_key=" << apiKey;
+    string configJSON;
+    if (curl.GetUrl(url.str().c_str(), &configJSON)) {
+        return parseJSON(configJSON);
+    }
+    return false;
+}
+
+bool cMovieDBScraper::parseJSON(string jsonString) {
+    cJsonLoader root(jsonString.c_str());
+    if (!root.isObject()) {
+        return false;
+    }
+    json_t *images = root.objectByName("images");
+    if(!json_is_object(images)) {
+        return false;
+    }
+    
+    json_t *imgUrl;
+    imgUrl = json_object_get(images, "base_url");
+    if(!json_is_string(imgUrl)) {
+        return false;
+    }
+    imageUrl = json_string_value(imgUrl);
+    return true;
+}
+
+int cMovieDBScraper::SearchMovie(string movieName, string year) {
+    stringstream url;
+    string movieJSON;
+    int movieID = -1;
+    char* escUrl = curl.EscapeUrl(movieName.c_str());
+
+    url << baseURL << "/search/movie?api_key=" << apiKey << "&query=" << escUrl << "&language=" << language.c_str();
+    if (year.size() > 0)
+        url << "&year=" << year.c_str();
+    curl.Free(escUrl);
+
+    if (curl.GetUrl(url.str().c_str(), &movieJSON)) {
+        cMovieDbMovie movie(movieJSON);
+        movieID = movie.ParseJSONForMovieId(movieName);
+    }
+
+    return movieID;
+}
+
+cMovieDbMovie *cMovieDBScraper::ReadMovie(int movieID) {
+    stringstream url;
+    url << baseURL << "/movie/" << movieID << "?api_key=" << apiKey << "&language=" << language.c_str();
+    string movieJSON;
+    cMovieDbMovie *movie = NULL;
+    if (curl.GetUrl(url.str().c_str(), &movieJSON)) {
+        movie = new cMovieDbMovie(movieJSON);
+        movie->id = movieID;
+        movie->SetBaseUrl(baseURL);
+        movie->SetApiKey(apiKey);
+        movie->SetPosterBaseUrl(imageUrl + posterSize);
+        movie->SetBackdropBaseUrl(imageUrl + backdropSize);
+        movie->SetActorBaseUrl(imageUrl + actorthumbSize);
+        movie->ParseJSON();
+    }
+    return movie;
+}
diff --git a/scraper/themoviedbscraper/themoviedbscraper.h b/scraper/themoviedbscraper/themoviedbscraper.h
new file mode 100644
index 0000000..d2f449e
--- /dev/null
+++ b/scraper/themoviedbscraper/themoviedbscraper.h
@@ -0,0 +1,31 @@
+#ifndef __TVSCRAPER_MOVIEDBSCRAPER_H
+#define __TVSCRAPER_MOVIEDBSCRAPER_H
+
+#include "moviedbmovie.h"
+#include "moviedbactor.h"
+
+using namespace std;
+
+// --- cMovieDBScraper -------------------------------------------------------------
+
+class cMovieDBScraper {
+private:
+    string apiKey;
+    string language;
+    string baseURL;
+    string imageUrl;
+    string posterSize;
+    string backdropSize;
+    string actorthumbSize;
+    bool parseJSON(string jsonString);
+    int SearchMovie(string movieName, string year);
+public:
+    cMovieDBScraper(string language);
+    virtual ~cMovieDBScraper(void);
+    bool Connect(void);
+    cMovieDbMovie *Scrap(string movieName, string year="");
+    cMovieDbMovie *ReadMovie(int movieID);
+};
+
+
+#endif //__TVSCRAPER_MOVIEDBSCRAPER_H
author	horchi <vdr@jwendel.de>	2017-03-05 16:39:28 +0100
committer	horchi <vdr@jwendel.de>	2017-03-05 16:39:28 +0100
commit	e2a48d8701f91b8e24fbe9e99e91eb72a87bb749 (patch)
tree	726f70554b4ca985a09ef6e30a7fdc8df089993c /scraper/themoviedbscraper
download	vdr-epg-daemon-e2a48d8701f91b8e24fbe9e99e91eb72a87bb749.tar.gz vdr-epg-daemon-e2a48d8701f91b8e24fbe9e99e91eb72a87bb749.tar.bz2