diff options
author | woro <woro@e10066b5-e1e2-0310-b819-94efdf66514b> | 2008-07-07 11:50:27 +0000 |
---|---|---|
committer | woro <woro@e10066b5-e1e2-0310-b819-94efdf66514b> | 2008-07-07 11:50:27 +0000 |
commit | 91025428f2b78381d0f876818845402ac01e48a8 (patch) | |
tree | 69c165cb4c4af424067b12c6be48d1c2dc95fce5 | |
parent | 7270e75552318e948eca64b591a21ef8a70430ec (diff) | |
download | vdr-plugin-muggle-91025428f2b78381d0f876818845402ac01e48a8.tar.gz vdr-plugin-muggle-91025428f2b78381d0f876818845402ac01e48a8.tar.bz2 |
muggle_getlyrics now works with the unmodified googlyrics
git-svn-id: https://vdr-muggle.svn.sourceforge.net/svnroot/vdr-muggle/trunk/muggle-plugin@1205 e10066b5-e1e2-0310-b819-94efdf66514b
-rwxr-xr-x | scripts/googlyrics | 409 | ||||
-rw-r--r-- | scripts/googlyrics.diff | 49 | ||||
-rwxr-xr-x | scripts/muggle_getlyrics | 42 |
3 files changed, 37 insertions, 463 deletions
diff --git a/scripts/googlyrics b/scripts/googlyrics deleted file mode 100755 index 62d6532..0000000 --- a/scripts/googlyrics +++ /dev/null @@ -1,409 +0,0 @@ -#!/usr/bin/perl -use strict; # Disabled for release version -use warnings; -use WWW::Mechanize; -use IO::File; -use HTML::Entities; -use Text::Iconv; - -#Necessary globals -our $mech = WWW::Mechanize->new(); -$mech->agent_alias( 'Linux Mozilla' ); - -#Sites used for URL matching - -my %metro = ( - site => "metrolyrics.com", - name => "Metrolyrics", - regex => qr/Ringtone \*\*\*<\/a>(.*?)<img/msi, - disabled => 0, - plain => 0, -); - -my %freel = ( - site => "free-lyrics.net", - name => "Free-Lyrics", - regex => qr/<td class="style5" style="font-weight:normal;padding-left:5px;">(.*?)<\/td>/msi, - disabled => 0, - plain => 0, -); - -my %hotly = ( - site => "hotlyrics.net", - name => "Hot Lyrics", - regex => qr/<!-- GOOGLE END \/\/-->(.*?)<script type="text\/javascript">/msi, - disabled => 0, - plain => 0, -); -my %leos = ( - site => "leoslyrics.com", - name => "Leo's Lyrics", - regex => qr/<font face="Trebuchet MS, Verdana, Arial" size=-1>(.*?)<\/font>/msi, - disabled => 0, - plain => 0, -); - -my %mma = ( - site => "themadmusicarchive.com", - name => "The Mad Music Archive", - regex => qr/<td><span class="Verdana8">(.*?)<\/span>/msi, - disabled => 0, - plain => 0, -); - -my %lyricspy = ( - site => "lyricspy.com", - name => "Lyricspy", - regex => qr/<\/b><br \/>(.*?)<div>/msi, - disabled => 0, - plain => 0, -); - -my %lyricwiki = ( - site => "lyricwiki.org", - name => "Lyricwiki", - regex => qr/<div id="lyric">(.*?)<\/div/msi, - disabled => 0, - plain => 0, -); - -my %lyriki = ( - site => "lyriki.org", - name => "Lyriki", - regex => qr/<\/div>\n<p>(.*?)<\/p>/msi, - disabled => 0, - plain => 0, -); - -my %lyricsmania = ( - site => "lyricsmania.com", - name => "Lyricsmania", - regex => qr/Title: <b>.*?<br><br>(.*?)<script/msi, - disabled => 0, - plain => 0, -); - -my %letssingit = ( - site => "letssingit.com", - name => "Let's Sing It", - regex => qr/<TR class=row2><TD><PRE>(.*)<\/PRE><SPAN class=credits>/msi, - disabled => 0, - plain => 1, -); - -my %sing365 = ( - site => "sing365.com", - name => "Sing365", - #regex => qr/Print the Lyrics(.*?)<hr size=1 color=#cccccc>/msi, - regex => qr|Ringtones</u> <<(.*?)<TABLE cellSpacing="0"|msi, - disabled => 0, - plain => 0, -); - -my %azlyrics = ( - site => "azlyrics.com", - name => "AZLyrics", - regex => qr/<FONT size=2>.*?<BR>\s*(.*?)\[ <a href="http:\/\/www.azlyrics.com">www.azlyrics.com<\/a> \]<BR><BR>/msi, - disabled => 0, - plain => 0, -); - -my %l007 = ( - site => "lyrics007.com", - name => "Lyrics007", - #regex => qr/src=\"http:\/\/pagead2\.googlesyndication\.com\/pagead\/show_ads\.js\">\n<\/script>\n<br><br>(.*?)The hottest songs from/msi, - regex => qr|Ringtone <<(.*?)<a|msi, - disabled => 0, - plain => 0, -); - -my %actionext = ( - site => "actionext.com", - name => "Actionext", - regex => qr/<h3>performed by .*?<\/h3>(.*)<div class="foundat">/msi, - disabled => 0, - plain => 0, -); - -my %songmeanings = ( - site => "songmeanings.net", - name => "Song Meanings", - regex => qr/<td width="100%" style="text-align:left;">.*<td width="100%" style="text-align:left;">\s*(.*?)\s*<\/td>/msi, - disabled => 0, - plain => 0, -); - -my %wearethelyrics = ( - site => "wearethelyrics.com", - name => "We Are The Lyrics", - regex => qr/<\/h3>\n<p>\s*(.*?)\s*<\/p>/msi, - disabled => 0, - plain => 0, -); - -my %mp3bg = ( - site => "mp3-bg.com", - name => "mp3-bg.com", - regex => qr/<\/h2><p>(.*?)<ul class="admin">/msi, - disabled => 0, - plain => 0, -); - -my %mldb = ( - site => "mldb.org", - name => "MLDb", - regex => qr/<p class=songtext>(.*?)<\/table>/msi, - disabled => 0, - plain => 0, -); - -my %justsomelyrics = ( - site => "justsomelyrics.com", - name => "JUST SOME LYRICS", - regex => qr/<\/h1>(.*?)<a/msi, - disabled => 0, - plain => 0, -); - -my %mylyricsbox = ( - site => "mylyricsbox.com", - name => "MyLyricsBox", - regex => qr/<div class="songLyrics">(.*?)<\/div>/msi, - disabled => 0, - plain => 0, -); - -my %megalyrics = ( - site => "megalyrics.ru", - name => "MegaLyrics", - regex => qr/<\/script>[[:cntrl:]]*?<br><br>(.*?)<br><a href=\"javascript/msi, - disabled => 0, - plain => 0, -); - -my %lyricsee = ( - site => "lyrics.ee", - name => "Lyrics.ee", - regex => qr|</td></tr> -->*?<br>\n(.*?)<p><br>|msi, - disabled => 0, - plain => 0, -); - -my %lyricseeprint = ( - site => "lyrics.ee", - name => "Lyrics.ee (print page)", - regex => qr|<td height="20"></td>(.*?)</td>|msi, - disabled => 0, - plain => 0, -); - -my %kovach = ( - site => "kovach.co.yu", - name => "Kovach", - regex => qr#>Z</a>.*?<td width="100%" valign="top">(.*?)</td></tr></table>#msi, - disabled => 0, - plain => 0, -); - -my %letras = ( - site => "letras.terra.com.br", - name => "letras.terra.com.br", - regex => qr|<p id='cmp'>.*?</p>(.*?)</p><br/>|msi, - disabled => 0, - plain => 0, -); - -my %lyricstime = ( - site => "lyricstime.com", - name => "Lyrics Time", - regex => qr|.*END ADREACTOR ADVANCED CODE BLOCK -->(.*?)<!-- main content end|msi, - disabled => 0, - plain => 0, -); - -my %lyricsspot = ( - site => "lyricsspot.com", - name => "Lyricsspot", - regex => qr/<\/h3><font size="2">(.*?)<\/p><\/font>/msi, - disabled => 0, - plain => 0, -); - -my %local = ( - site => "~/lyrics", - name => "Local lyrics/cache", - disabled => 0, - plain => 1, -); - -#put references to all the lyrics sites into the hash - -my @sites = (\%metro,\%freel,\%hotly,\%leos, \%mma, \%lyricspy, \%lyricwiki, \%lyriki, \%letssingit, \%sing365, \%azlyrics, \%l007, \%actionext, \%songmeanings, \%wearethelyrics, \%mp3bg, \%mldb, \%justsomelyrics, \%mylyricsbox, \%megalyrics, \%lyricsmania, \%lyricsee, \%lyricseeprint, \%kovach, \%letras, \%lyricstime, \%lyricsspot); - - -sub querylyrics { - my $artist = urldecode(shift); - my $title = urldecode(shift); - - # This is for local file lyrics - my $fh = new IO::File; - - my $file = $title . ".txt"; - my $file2 = $artist . " - " . $title . ".txt"; - if (open(FH, "< " . $ENV{"HOME"} . "/lyrics/$file") || open(FH,"< " . $ENV{"HOME"} . "/lyrics/$file2")) { - my $text = ""; - while ($_ = <FH>) { - $text .= $_ - } - $fh->close; - showlyrics($text, \%local, "http://localhost", $artist, $title); - return 1; - } - - $artist =~ s/^The //sgi; #Remove the starting word "The" from artist name, it just causes problems - $title =~ s/\(.*?\)//sgi; - $title =~ s/\[.*?\]//sgi; - if ($artist eq "") { - $title =~ /(.*) - (.*)/; # try to extract song + artist information. - if ($1 ne '' && $2 ne '') { - $artist = $1; - $title = $2; - } - } - my $attempt = 1; - while ($attempt != 5) { -# print "\n<br>Attempt #" . $attempt . "\n"; - $mech->get("http://www.google.com/intl/en/"); - if (!$mech->success()) { - return "connectfail"; - } - # Try several search queries. - if ($attempt == 1) { - $mech->field("q", "lyrics intitle:\"$artist - $title\"", ); - } elsif ($attempt == 2) { - $mech->field("q", "lyrics \"$artist\" intitle:\"$title\"", ); - } elsif ($attempt == 3) { - $mech->field("q", "lyrics \"$artist\" \"$title\"", ); - } elsif ($attempt == 4) { - $mech->field("q", "lyrics $artist $title", ); - } - $mech->submit(); - foreach ($mech->content() =~ m/<div class=g[\s>].*?<a href=\"(.*?)\"/img) { - my $url = $_; -# print "\n<br>" . $url . "\n"; - my $o; - my $ly; - foreach $ly (@sites) { - my $urlregex = $ly->{site}; - if ($url =~ m/$urlregex/si) { - if ($o = scrape($url, $ly, $artist, $title)) { - return $o; - } else { - next; - } - } - } - } - $attempt = $attempt + 1; - } - return "Fail"; -} - -sub scrape { - my $loc = shift; - my $site = shift; - my $artist = shift; - my $title = shift; - if ($site->{disabled}) { - return 0; - } - $mech->get($loc); - if (!$mech->success()) { - return 0; #Assume the user _does_ have an internet connection since a previous test has happened on google, let's just say the lyrics site is down. - } - my @cont_type = $mech->response()->content_type; - $cont_type[1]=~ s/charset=(.*)/$1/ig; # Get the charset of the response - my $char_converter = Text::Iconv->new($cont_type[1], "UTF-8"); # Convert the response to UTF-8 - my $current = $mech->content(); - my $regex = $site->{regex}; - if ($current =~ $regex) { -# print "\n<br>Regex success for " . $site->{name} . "\n"; - showlyrics($char_converter->convert($1), $site, $loc, $artist, $title); - return 1; - } else { -# print "\n<br>Regex failed for " . $site->{name} . "\n"; - return 0; - } -} - -#while (1) { - my $message = <STDIN>; - chomp($message); - if ($message =~ /^configure/) { - system("dcop", "amarok", "playlist", "popupMessage", "This script does not require any configuration."); - } elsif ($message =~ /^fetchLyrics/) { - my @tofetch = split(/ /, $message); - my $artist = urldecode($tofetch[1]); - my $title = urldecode($tofetch[2]); - my $out = querylyrics($artist, $title); - if ($out eq "Fail") { - system("dcop", "amarok", "contextbrowser", "showLyrics", "<?xml version=\"1.0\" encoding=\"UTF-8\" ?> <suggestions page_url=\"http://www.google.org\">Failed to find any lyrics. Press refresh to try again.</suggestions>"); - } elsif ($out eq "connectfail") { - system("dcop", "amarok", "contextbrowser", "showLyrics", ""); #communications errror, "send an empty string" - } - } -#} - -sub showlyrics { - my $out = shift; - my $site = shift; - my $loc = shift; - my $artist = shift; - my $title = shift; - if ($site->{plain}) { - $out = striphtml($out); - } else { - $out = striphtml(htmllinebreak($out)); - } - $out =~ s/^\s+|\s+$//g; #Kills leading and trailing whitespace. - $out =~ s/\[.*? lyrics on http:\/\/www\.metrolyrics\.com\]\n//g; #metrolyrics: we're sick of your bullshit. - print $out . "\n"; - my $doc = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?> <lyrics site=\"" . encode_entities($site->{name}) ."\" site_url=\"" . encode_entities($site->{site}) . "\" page_url=\"" . encode_entities($loc) . "\" artist=\"" . filter($artist) . "\" title=\"" . filter($title) . "\">" . filter($out) . "</lyrics>"; - my $fh = new IO::File; - my $file = $artist . " - " . $title . " - Saved.txt"; - if ($fh->open("> " . $ENV{"HOME"} . "/lyrics/$file")) { - print $fh $out - } - system("dcop", "amarok", "contextbrowser", "showLyrics", $doc); -} -sub htmllinebreak { - my $out = shift; - $out =~ s/\n//sgi; #Kill normal linebreaks, we're going HTML :) - $out =~ s/<br>/\n/sgi; - $out =~ s/<br *\/?>/\n/sgi; - return $out; -} - -sub filter { - my $text = shift; - $text =~ s/&/&/go; - $text =~ s/</</go; - $text =~ s/>/>/go; - $text =~ s/'/'/go; - $text =~ s/`/'/go; - $text =~ s/’/'/go; - $text =~ s/"/"/go; - return $text; -} - -sub urldecode { - my $str = shift; - $str =~ s/%([A-Fa-f0-9]{2})/pack('C', hex($1))/seg; - return $str; -} - -sub striphtml { - my $str = shift; - $str =~ s/\<[^\<]+\>//g; - return $str; -} diff --git a/scripts/googlyrics.diff b/scripts/googlyrics.diff deleted file mode 100644 index 4e01f62..0000000 --- a/scripts/googlyrics.diff +++ /dev/null @@ -1,49 +0,0 @@ ---- /usr/share/apps/amarok/scripts/googlyrics/googlyrics 2008-02-13 23:52:25.000000000 +0100 -+++ googlyrics 2008-03-27 13:18:20.000000000 +0100 -@@ -264,7 +264,7 @@ - } - my $attempt = 1; - while ($attempt != 5) { -- print "\n<br>Attempt #" . $attempt . "\n"; -+# print "\n<br>Attempt #" . $attempt . "\n"; - $mech->get("http://www.google.com/intl/en/"); - if (!$mech->success()) { - return "connectfail"; -@@ -282,7 +282,7 @@ - $mech->submit(); - foreach ($mech->content() =~ m/<div class=g[\s>].*?<a href=\"(.*?)\"/img) { - my $url = $_; -- print "\n<br>" . $url . "\n"; -+# print "\n<br>" . $url . "\n"; - my $o; - my $ly; - foreach $ly (@sites) { -@@ -319,16 +319,16 @@ - my $current = $mech->content(); - my $regex = $site->{regex}; - if ($current =~ $regex) { -- print "\n<br>Regex success for " . $site->{name} . "\n"; -+# print "\n<br>Regex success for " . $site->{name} . "\n"; - showlyrics($char_converter->convert($1), $site, $loc, $artist, $title); - return 1; - } else { -- print "\n<br>Regex failed for " . $site->{name} . "\n"; -+# print "\n<br>Regex failed for " . $site->{name} . "\n"; - return 0; - } - } - --while (1) { -+#while (1) { - my $message = <STDIN>; - chomp($message); - if ($message =~ /^configure/) { -@@ -344,7 +344,7 @@ - system("dcop", "amarok", "contextbrowser", "showLyrics", ""); #communications errror, "send an empty string" - } - } --} -+#} - - sub showlyrics { - my $out = shift; diff --git a/scripts/muggle_getlyrics b/scripts/muggle_getlyrics index 5ee9a75..190e413 100755 --- a/scripts/muggle_getlyrics +++ b/scripts/muggle_getlyrics @@ -1,8 +1,20 @@ +#!/bin/sh + +export GOOGLYRICS=/usr/share/apps/amarok/scripts/googlyrics/googlyrics + rm -f "$3" txtfound=0 artist=`echo $1 | sed 's/ /%20/'g` title=`echo $2 | sed 's/ /%20/'g` -echo fetchLyrics $artist $title | `dirname $0`/googlyrics 2>/dev/null | + +if test ! -x $GOOGLYRICS +then + echo $GOOGLYRICS nicht gefunden > $3 + exit 2 +fi + +export success=0 +echo fetchLyrics $artist $title | $GOOGLYRICS 2>&1 | sed 's/\x0d//g' | sed 's/\xc2\xb4/\x27/g' | sed 's/\xc3\x82\x27/\x27/g' | @@ -10,18 +22,38 @@ echo fetchLyrics $artist $title | `dirname $0`/googlyrics 2>/dev/null | sed 's/\xc3\xb9/\x27/g' | sed 's/\xe2\x80\x99/\x27/g' | grep -ive 'NEW.*ringtones' | + grep -v '--------------' | recode HTML..utf8 | sed 's/\xc2\x91/\x27/g' | # in unicode, those two are reserved for sed 's/\xc2\x92/\x27/g' | # private use, but still some sites use them... while read line do -# strip starting empty lines +# did we find a text yet? + if expr "$line" : '<br>Regex success' >/dev/null 2>&1 + then + success=1 + continue + fi +# googlyrics tries to send the finished text to amarok: + if expr "$line" : 'object not accessible' >/dev/null 2>&1 + then + killall googlyrics >/dev/null 2>&1 + break + fi +# googlyrics starts main loop again: + if expr "$line" : '.*scalar chomp.*STDIN' >/dev/null 2>&1 + then + killall googlyrics >/dev/null 2>&1 + break + fi + test $success = 0 && continue +# suppress other googlyrics error messages + expr "$line" : '.*'$GOOGLYRICS >/dev/null 2>&1 && continue notempty=0 test x"$line" = x || notempty=1; test $notempty -eq 1 && txtfound=1; - test $txtfound -eq 1 -o $notempty -eq 1 && echo $line -done > "$3".loading - + test $txtfound -eq 1 && echo $line +done > "$3".loading 2>/dev/null # use .loading because the file is already there when googlyrics starts # but muggle thinks we are done as soon as $3 exists |