muggle_getlyrics now works with the unmodified googlyrics

git-svn-id: https://vdr-muggle.svn.sourceforge.net/svnroot/vdr-muggle/trunk/muggle-plugin@1205 e10066b5-e1e2-0310-b819-94efdf66514b
author: woro <woro@e10066b5-e1e2-0310-b819-94efdf66514b> 2008-07-07 11:50:27 +0000
committer: woro <woro@e10066b5-e1e2-0310-b819-94efdf66514b> 2008-07-07 11:50:27 +0000
commit: 91025428f2b78381d0f876818845402ac01e48a8 (patch)
tree: 69c165cb4c4af424067b12c6be48d1c2dc95fce5
parent: 7270e75552318e948eca64b591a21ef8a70430ec (diff)
download: vdr-plugin-muggle-91025428f2b78381d0f876818845402ac01e48a8.tar.gz
vdr-plugin-muggle-91025428f2b78381d0f876818845402ac01e48a8.tar.bz2
3 files changed, 37 insertions, 463 deletions
diff --git a/scripts/googlyrics b/scripts/googlyrics
deleted file mode 100755
index 62d6532..0000000
--- a/scripts/googlyrics
+++ /dev/null
@@ -1,409 +0,0 @@
-#!/usr/bin/perl
-use strict; # Disabled for release version
-use warnings;
-use WWW::Mechanize;
-use IO::File;
-use HTML::Entities;
-use Text::Iconv;
-
-#Necessary globals
-our $mech = WWW::Mechanize->new();
-$mech->agent_alias( 'Linux Mozilla' );
-
-#Sites used for URL matching
-
-my %metro = (
-	site => "metrolyrics.com",
-	name => "Metrolyrics",
-	regex => qr/Ringtone \*\*\*<\/a>(.*?)<img/msi,
-	disabled => 0,
-	plain => 0,
-);
-
-my %freel = (
-	site => "free-lyrics.net",
-	name => "Free-Lyrics",
-	regex => qr/<td class="style5" style="font-weight:normal;padding-left:5px;">(.*?)<\/td>/msi,
-	disabled => 0,
-	plain => 0,
-);
-
-my %hotly = (
-	site => "hotlyrics.net",
-	name => "Hot Lyrics",
-	regex => qr/<!-- GOOGLE END \/\/-->(.*?)<script type="text\/javascript">/msi,
-	disabled => 0,
-	plain => 0,
-);
-my %leos = (
-	site => "leoslyrics.com",
-	name => "Leo's Lyrics",
-	regex => qr/<font face="Trebuchet MS, Verdana, Arial" size=-1>(.*?)<\/font>/msi,
-	disabled => 0,
-	plain => 0,
-);
-
-my %mma = (
-	site => "themadmusicarchive.com",
-	name => "The Mad Music Archive",
-	regex => qr/<td><span class="Verdana8">(.*?)<\/span>/msi,
-	disabled => 0,
-	plain => 0,
-);
-
-my %lyricspy = (
-	site => "lyricspy.com",
-	name => "Lyricspy",
-	regex => qr/<\/b><br \/>(.*?)<div>/msi,
-	disabled => 0,
-	plain => 0,
-);
-
-my %lyricwiki = (
-	site => "lyricwiki.org",
-	name => "Lyricwiki",
-	regex => qr/<div id="lyric">(.*?)<\/div/msi,
-	disabled => 0,
-	plain => 0,
-);
-
-my %lyriki = (
-	site => "lyriki.org",
-	name => "Lyriki",
-	regex => qr/<\/div>\n<p>(.*?)<\/p>/msi,
-	disabled => 0,
-	plain => 0,
-);
-
-my %lyricsmania = (
-	site => "lyricsmania.com",
-	name => "Lyricsmania",
-	regex => qr/Title: <b>.*?<br><br>(.*?)<script/msi,
-	disabled => 0,
-	plain => 0,
-);
-
-my %letssingit = (
-	site => "letssingit.com",
-	name => "Let's Sing It",
-	regex => qr/<TR class=row2><TD><PRE>(.*)<\/PRE><SPAN class=credits>/msi,
-	disabled => 0,
-	plain => 1,
-);
-
-my %sing365 = (
-	site => "sing365.com",
-	name => "Sing365",
-	#regex => qr/Print the Lyrics(.*?)<hr size=1 color=#cccccc>/msi,
-	regex => qr|Ringtones</u> <<(.*?)<TABLE cellSpacing="0"|msi,
-	disabled => 0,
-	plain => 0,
-);
-
-my %azlyrics = (
-	site => "azlyrics.com",
-	name => "AZLyrics",
-	regex => qr/<FONT size=2>.*?<BR>\s*(.*?)\[ <a href="http:\/\/www.azlyrics.com">www.azlyrics.com<\/a> \]<BR><BR>/msi,
-	disabled => 0,
-	plain => 0,
-);
-
-my %l007 = (
-	site => "lyrics007.com",
-	name => "Lyrics007",
-	#regex => qr/src=\"http:\/\/pagead2\.googlesyndication\.com\/pagead\/show_ads\.js\">\n<\/script>\n<br><br>(.*?)The hottest songs from/msi,
-	regex => qr|Ringtone <<(.*?)<a|msi,
-	disabled => 0,
-	plain => 0,
-);
-
-my %actionext = (
-	site => "actionext.com",
-	name => "Actionext",
-	regex => qr/<h3>performed by .*?<\/h3>(.*)<div class="foundat">/msi,
-	disabled => 0,
-	plain => 0,
-);
-
-my %songmeanings = (
-	site => "songmeanings.net",
-	name => "Song Meanings",
-	regex => qr/<td width="100%" style="text-align:left;">.*<td width="100%" style="text-align:left;">\s*(.*?)\s*<\/td>/msi,
-	disabled => 0,
-	plain => 0,
-);
-
-my %wearethelyrics = (
-	site => "wearethelyrics.com",
-	name => "We Are The Lyrics",
-	regex => qr/<\/h3>\n<p>\s*(.*?)\s*<\/p>/msi,
-	disabled => 0,
-	plain => 0,
-);
-
-my %mp3bg = (
-	site => "mp3-bg.com",
-	name => "mp3-bg.com",
-	regex => qr/<\/h2><p>(.*?)<ul class="admin">/msi,
-	disabled => 0,
-	plain => 0,
-);
-
-my %mldb = (
-	site => "mldb.org",
-	name => "MLDb",
-	regex => qr/<p class=songtext>(.*?)<\/table>/msi,
-	disabled => 0,
-	plain => 0,
-);
-
-my %justsomelyrics = (
-	site => "justsomelyrics.com",
-	name => "JUST SOME LYRICS",
-	regex => qr/<\/h1>(.*?)<a/msi,
-	disabled => 0,
-	plain => 0,
-);
-
-my %mylyricsbox = (
-	site => "mylyricsbox.com",
-	name => "MyLyricsBox",
-	regex => qr/<div class="songLyrics">(.*?)<\/div>/msi,
-	disabled => 0,
-	plain => 0,
-);
-
-my %megalyrics = (
-	site => "megalyrics.ru",
-	name => "MegaLyrics",
-	regex => qr/<\/script>[[:cntrl:]]*?<br><br>(.*?)<br><a href=\"javascript/msi,
-	disabled => 0,
-	plain => 0,
-);
-
-my %lyricsee = (
-	site => "lyrics.ee",
-	name => "Lyrics.ee",
-	regex => qr|</td></tr> -->*?<br>\n(.*?)<p><br>|msi,
-	disabled => 0,
-	plain => 0,
-);
-
-my %lyricseeprint = (
-	site => "lyrics.ee",
-	name => "Lyrics.ee (print page)",
-	regex => qr|<td height="20"></td>(.*?)</td>|msi,
-	disabled => 0,
-	plain => 0,
-);
-
-my %kovach = (
-	site => "kovach.co.yu",
-	name => "Kovach",
-	regex => qr#>Z</a>.*?<td width="100%" valign="top">(.*?)</td></tr></table>#msi,
-	disabled => 0,
-	plain => 0,
-);
-
-my %letras = (
-        site => "letras.terra.com.br",
-        name => "letras.terra.com.br",
-        regex => qr|<p id='cmp'>.*?</p>(.*?)</p><br/>|msi,
-        disabled => 0,
-        plain => 0,
-);
-
-my %lyricstime = (
-        site => "lyricstime.com",
-        name => "Lyrics Time",
-        regex => qr|.*END ADREACTOR ADVANCED CODE BLOCK -->(.*?)<!--            main content end|msi,
-        disabled => 0,
-        plain => 0,
-);
-
-my %lyricsspot = (
-        site => "lyricsspot.com",
-        name => "Lyricsspot",
-        regex => qr/<\/h3><font size="2">(.*?)<\/p><\/font>/msi,
-        disabled => 0,
-        plain => 0,
-);
-                                        
-my %local = (
-	site => "~/lyrics",
-	name => "Local lyrics/cache",
-	disabled => 0,
-	plain => 1,
-);
-
-#put references to all the lyrics sites into the hash
-
-my @sites = (\%metro,\%freel,\%hotly,\%leos, \%mma, \%lyricspy, \%lyricwiki, \%lyriki, \%letssingit, \%sing365, \%azlyrics, \%l007, \%actionext, \%songmeanings, \%wearethelyrics, \%mp3bg, \%mldb, \%justsomelyrics, \%mylyricsbox, \%megalyrics, \%lyricsmania, \%lyricsee, \%lyricseeprint, \%kovach, \%letras, \%lyricstime, \%lyricsspot);
-
-
-sub querylyrics {
-	my $artist = urldecode(shift);
-	my $title = urldecode(shift);
-
-	# This is for local file lyrics
-	my $fh = new IO::File;
-	
-	my $file = $title . ".txt";
-	my $file2 = $artist . " - " . $title . ".txt";
-	if (open(FH, "< " . $ENV{"HOME"} . "/lyrics/$file") || open(FH,"< " . $ENV{"HOME"} . "/lyrics/$file2")) {
-        my $text = "";
-        while ($_ = <FH>) {
-        	$text .= $_
-		}
-        $fh->close;
-        showlyrics($text, \%local, "http://localhost", $artist, $title);
-        return 1;
-    }
-
-	$artist =~ s/^The //sgi; #Remove the starting word "The" from artist name, it just causes problems
-	$title =~ s/\(.*?\)//sgi;
-	$title =~ s/\[.*?\]//sgi;
-	if ($artist eq "") {
-		$title =~ /(.*) - (.*)/; # try to extract song + artist information.
-		if ($1 ne '' && $2 ne '') {
-			$artist = $1;
-			$title = $2;
-		}
-	}
-	my $attempt = 1;
-	while ($attempt != 5) {
-#	print "\n<br>Attempt #" . $attempt . "\n";
-	$mech->get("http://www.google.com/intl/en/");
-	if (!$mech->success()) {
-		return "connectfail";
-	}
-	# Try several search queries.
-	if ($attempt == 1) {
-		$mech->field("q", "lyrics intitle:\"$artist - $title\"", );
-	} elsif ($attempt == 2) {
-		$mech->field("q", "lyrics \"$artist\" intitle:\"$title\"", );
-	} elsif ($attempt == 3) {
-		$mech->field("q", "lyrics \"$artist\" \"$title\"", );
-	} elsif ($attempt == 4) {
-		$mech->field("q", "lyrics $artist $title", );
-	}
-	$mech->submit();
-	foreach ($mech->content() =~ m/<div class=g[\s>].*?<a href=\"(.*?)\"/img) {
-		my $url = $_;
-#		print "\n<br>" . $url . "\n";
-		my $o;
-		my $ly;
-		foreach $ly (@sites) {
-			my $urlregex = $ly->{site};
-			if ($url =~ m/$urlregex/si) {
-				if ($o = scrape($url, $ly, $artist, $title)) {
-	    			return $o;
-				} else {
-					next;
-				}
-			}
-		}
-	}
-	$attempt = $attempt + 1;
-	}
-	return "Fail";
-}
-
-sub scrape {
-	my $loc = shift;
-	my $site = shift;
-	my $artist = shift;
-	my $title = shift;
-	if ($site->{disabled}) {
-		return 0;
-	}
-	$mech->get($loc);
-	if (!$mech->success()) {
-		return 0; #Assume the user _does_ have an internet connection since a previous test has happened on google, let's just say the lyrics site is down.
-	}
-	my @cont_type = $mech->response()->content_type;
-	$cont_type[1]=~ s/charset=(.*)/$1/ig; # Get the charset of the response
-	my $char_converter = Text::Iconv->new($cont_type[1], "UTF-8"); # Convert the response to UTF-8
-	my $current = $mech->content();
-	my $regex = $site->{regex};
-	if ($current =~ $regex) {
-#		print "\n<br>Regex success for " . $site->{name} . "\n";
-		showlyrics($char_converter->convert($1), $site, $loc, $artist, $title);
-		return 1;
-	} else {
-#		print "\n<br>Regex failed for " . $site->{name} . "\n";
-		return 0;
-	}
-}
-
-#while (1) {
-	my $message = <STDIN>;
-	chomp($message);
-	if ($message =~ /^configure/) {
-		system("dcop", "amarok", "playlist", "popupMessage", "This script does not require any configuration.");
-	} elsif ($message =~ /^fetchLyrics/) {
-		my @tofetch = split(/ /, $message);
-		my $artist = urldecode($tofetch[1]);
-		my $title = urldecode($tofetch[2]);
-		my $out = querylyrics($artist, $title);
-		if ($out eq "Fail") {
-			system("dcop", "amarok", "contextbrowser", "showLyrics", "<?xml version=\"1.0\" encoding=\"UTF-8\" ?> <suggestions page_url=\"http://www.google.org\">Failed to find any lyrics. Press refresh to try again.</suggestions>");
-		} elsif ($out eq "connectfail") {
-			system("dcop", "amarok", "contextbrowser", "showLyrics", ""); #communications errror, "send an empty string"
-		}
-	}
-#}
-
-sub showlyrics {
-	my $out = shift;
-	my $site = shift;
-	my $loc = shift;
-	my $artist = shift;
-	my $title = shift;
-	if ($site->{plain}) {
-		$out = striphtml($out);
-	} else {
-		$out = striphtml(htmllinebreak($out));
-	}
-	$out =~ s/^\s+|\s+$//g; #Kills leading and trailing whitespace.
-	$out =~ s/\[.*? lyrics on http:\/\/www\.metrolyrics\.com\]\n//g; #metrolyrics: we're sick of your bullshit.
-	print $out . "\n";
-	my $doc = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?> <lyrics site=\"" . encode_entities($site->{name}) ."\" site_url=\"" . encode_entities($site->{site}) . "\" page_url=\"" . encode_entities($loc) . "\" artist=\"" . filter($artist) . "\" title=\"" . filter($title) . "\">" . filter($out) . "</lyrics>";
-	my $fh = new IO::File;
-	my $file = $artist . " - " . $title . " - Saved.txt";
-	if ($fh->open("> " . $ENV{"HOME"} . "/lyrics/$file")) {
-		print $fh $out
-	}
-	system("dcop", "amarok", "contextbrowser", "showLyrics", $doc);
-}
-sub htmllinebreak {
-	my $out = shift;
-	$out =~ s/\n//sgi; #Kill normal linebreaks, we're going HTML :)
-	$out =~ s/<br>/\n/sgi;
-	$out =~ s/<br *\/?>/\n/sgi;
-	return $out;
-}
-
-sub filter {
-	my $text = shift;
-    $text =~ s/&/&amp;/go;
-    $text =~ s/</&lt;/go;
-    $text =~ s/>/&gt;/go;
-    $text =~ s/'/&apos;/go;
-    $text =~ s/`/&apos;/go;
-    $text =~ s/’/&apos;/go;
-    $text =~ s/"/&quot;/go;
-    return $text;
-}
-
-sub urldecode {
-  my $str = shift;
-  $str =~ s/%([A-Fa-f0-9]{2})/pack('C', hex($1))/seg;
-  return $str;
-}
-
-sub striphtml {
-	my $str = shift;
-	$str =~ s/\<[^\<]+\>//g;
-	return $str;
-}
diff --git a/scripts/googlyrics.diff b/scripts/googlyrics.diff
deleted file mode 100644
index 4e01f62..0000000
--- a/scripts/googlyrics.diff
+++ /dev/null
@@ -1,49 +0,0 @@
---- /usr/share/apps/amarok/scripts/googlyrics/googlyrics	2008-02-13 23:52:25.000000000 +0100
-+++ googlyrics	2008-03-27 13:18:20.000000000 +0100
-@@ -264,7 +264,7 @@
- 	}
- 	my $attempt = 1;
- 	while ($attempt != 5) {
--	print "\n<br>Attempt #" . $attempt . "\n";
-+#	print "\n<br>Attempt #" . $attempt . "\n";
- 	$mech->get("http://www.google.com/intl/en/");
- 	if (!$mech->success()) {
- 		return "connectfail";
-@@ -282,7 +282,7 @@
- 	$mech->submit();
- 	foreach ($mech->content() =~ m/<div class=g[\s>].*?<a href=\"(.*?)\"/img) {
- 		my $url = $_;
--		print "\n<br>" . $url . "\n";
-+#		print "\n<br>" . $url . "\n";
- 		my $o;
- 		my $ly;
- 		foreach $ly (@sites) {
-@@ -319,16 +319,16 @@
- 	my $current = $mech->content();
- 	my $regex = $site->{regex};
- 	if ($current =~ $regex) {
--		print "\n<br>Regex success for " . $site->{name} . "\n";
-+#		print "\n<br>Regex success for " . $site->{name} . "\n";
- 		showlyrics($char_converter->convert($1), $site, $loc, $artist, $title);
- 		return 1;
- 	} else {
--		print "\n<br>Regex failed for " . $site->{name} . "\n";
-+#		print "\n<br>Regex failed for " . $site->{name} . "\n";
- 		return 0;
- 	}
- }
- 
--while (1) {
-+#while (1) {
- 	my $message = <STDIN>;
- 	chomp($message);
- 	if ($message =~ /^configure/) {
-@@ -344,7 +344,7 @@
- 			system("dcop", "amarok", "contextbrowser", "showLyrics", ""); #communications errror, "send an empty string"
- 		}
- 	}
--}
-+#}
- 
- sub showlyrics {
- 	my $out = shift;
diff --git a/scripts/muggle_getlyrics b/scripts/muggle_getlyrics
index 5ee9a75..190e413 100755
--- a/scripts/muggle_getlyrics
+++ b/scripts/muggle_getlyrics
@@ -1,8 +1,20 @@
+#!/bin/sh
+
+export GOOGLYRICS=/usr/share/apps/amarok/scripts/googlyrics/googlyrics
+
 rm -f "$3"
 txtfound=0
 artist=`echo $1 | sed 's/ /%20/'g`
 title=`echo $2 | sed 's/ /%20/'g`
-echo fetchLyrics $artist $title | `dirname $0`/googlyrics 2>/dev/null |
+
+if test ! -x $GOOGLYRICS
+then
+	echo $GOOGLYRICS nicht gefunden > $3
+	exit 2
+fi
+
+export success=0
+echo fetchLyrics $artist $title | $GOOGLYRICS 2>&1 |
 	sed 's/\x0d//g' |
 	sed 's/\xc2\xb4/\x27/g' |
 	sed 's/\xc3\x82\x27/\x27/g' |
@@ -10,18 +22,38 @@ echo fetchLyrics $artist $title | `dirname $0`/googlyrics 2>/dev/null |
 	sed 's/\xc3\xb9/\x27/g' |
 	sed 's/\xe2\x80\x99/\x27/g' |
 	grep -ive 'NEW.*ringtones' |
+	grep -v '--------------' |
 	recode HTML..utf8 |
 	sed 's/\xc2\x91/\x27/g' |	# in unicode, those two are reserved for
 	sed 's/\xc2\x92/\x27/g' |   # private use, but still some sites use them...
 while read line
 do
-# strip starting empty lines
+# did we find a text yet?
+	if expr "$line" : '<br>Regex success' >/dev/null  2>&1
+	then
+		success=1
+		continue
+	fi
+# googlyrics tries to send the finished text to amarok:
+	if expr "$line" : 'object not accessible' >/dev/null  2>&1
+	then
+		killall googlyrics >/dev/null 2>&1
+		break
+	fi
+# googlyrics starts main loop again:
+	if expr "$line" : '.*scalar chomp.*STDIN' >/dev/null  2>&1
+	then
+		killall googlyrics >/dev/null 2>&1
+		break
+	fi
+	test $success = 0 && continue
+# suppress other googlyrics error messages
+	expr "$line" : '.*'$GOOGLYRICS >/dev/null 2>&1 && continue
 	notempty=0
 	test x"$line" = x || notempty=1;
 	test $notempty -eq 1 && txtfound=1;
-	test $txtfound -eq 1 -o $notempty -eq 1 && echo $line
-done > "$3".loading
-
+	test $txtfound -eq 1 && echo $line
+done > "$3".loading 2>/dev/null
 # use .loading because the file is already there when googlyrics starts
 # but muggle thinks we are done as soon as $3 exists
author	woro <woro@e10066b5-e1e2-0310-b819-94efdf66514b>	2008-07-07 11:50:27 +0000
committer	woro <woro@e10066b5-e1e2-0310-b819-94efdf66514b>	2008-07-07 11:50:27 +0000
commit	91025428f2b78381d0f876818845402ac01e48a8 (patch)
tree	69c165cb4c4af424067b12c6be48d1c2dc95fce5
parent	7270e75552318e948eca64b591a21ef8a70430ec (diff)
download	vdr-plugin-muggle-91025428f2b78381d0f876818845402ac01e48a8.tar.gz vdr-plugin-muggle-91025428f2b78381d0f876818845402ac01e48a8.tar.bz2