diff options
| author | woro <woro@e10066b5-e1e2-0310-b819-94efdf66514b> | 2008-04-11 20:11:00 +0000 |
|---|---|---|
| committer | woro <woro@e10066b5-e1e2-0310-b819-94efdf66514b> | 2008-04-11 20:11:00 +0000 |
| commit | 57d7ee812c8be1bd118471512682cb3393ce595f (patch) | |
| tree | 2e0d792fc74eda1661b439853c004e2ed5c06820 /scripts | |
| parent | b2550195d6b61f9be3470679082b8d6b5159aa83 (diff) | |
| download | vdr-plugin-muggle-57d7ee812c8be1bd118471512682cb3393ce595f.tar.gz vdr-plugin-muggle-57d7ee812c8be1bd118471512682cb3393ce595f.tar.bz2 | |
merge mp3ng branch into trunk
git-svn-id: https://vdr-muggle.svn.sourceforge.net/svnroot/vdr-muggle/trunk/muggle-plugin@1173 e10066b5-e1e2-0310-b819-94efdf66514b
Diffstat (limited to 'scripts')
| -rw-r--r-- | scripts/COPYRIGHT | 4 | ||||
| -rwxr-xr-x | scripts/googlyrics | 409 | ||||
| -rw-r--r-- | scripts/googlyrics.diff | 49 | ||||
| -rwxr-xr-x | scripts/muggle-image-convert | 42 | ||||
| -rwxr-xr-x | scripts/muggle_getlyrics | 35 |
5 files changed, 518 insertions, 21 deletions
diff --git a/scripts/COPYRIGHT b/scripts/COPYRIGHT index b0345b9..69933c4 100644 --- a/scripts/COPYRIGHT +++ b/scripts/COPYRIGHT @@ -1,3 +1,7 @@ +googlyrics is an adapted copy from +http://www.kde-apps.org/content/show.php/GoogLyrics?content=73850 + + The content of languages.txt is generated from the file iso_639.xml which contains this copyright: diff --git a/scripts/googlyrics b/scripts/googlyrics new file mode 100755 index 0000000..62d6532 --- /dev/null +++ b/scripts/googlyrics @@ -0,0 +1,409 @@ +#!/usr/bin/perl +use strict; # Disabled for release version +use warnings; +use WWW::Mechanize; +use IO::File; +use HTML::Entities; +use Text::Iconv; + +#Necessary globals +our $mech = WWW::Mechanize->new(); +$mech->agent_alias( 'Linux Mozilla' ); + +#Sites used for URL matching + +my %metro = ( + site => "metrolyrics.com", + name => "Metrolyrics", + regex => qr/Ringtone \*\*\*<\/a>(.*?)<img/msi, + disabled => 0, + plain => 0, +); + +my %freel = ( + site => "free-lyrics.net", + name => "Free-Lyrics", + regex => qr/<td class="style5" style="font-weight:normal;padding-left:5px;">(.*?)<\/td>/msi, + disabled => 0, + plain => 0, +); + +my %hotly = ( + site => "hotlyrics.net", + name => "Hot Lyrics", + regex => qr/<!-- GOOGLE END \/\/-->(.*?)<script type="text\/javascript">/msi, + disabled => 0, + plain => 0, +); +my %leos = ( + site => "leoslyrics.com", + name => "Leo's Lyrics", + regex => qr/<font face="Trebuchet MS, Verdana, Arial" size=-1>(.*?)<\/font>/msi, + disabled => 0, + plain => 0, +); + +my %mma = ( + site => "themadmusicarchive.com", + name => "The Mad Music Archive", + regex => qr/<td><span class="Verdana8">(.*?)<\/span>/msi, + disabled => 0, + plain => 0, +); + +my %lyricspy = ( + site => "lyricspy.com", + name => "Lyricspy", + regex => qr/<\/b><br \/>(.*?)<div>/msi, + disabled => 0, + plain => 0, +); + +my %lyricwiki = ( + site => "lyricwiki.org", + name => "Lyricwiki", + regex => qr/<div id="lyric">(.*?)<\/div/msi, + disabled => 0, + plain => 0, +); + +my %lyriki = ( + site => "lyriki.org", + name => "Lyriki", + regex => qr/<\/div>\n<p>(.*?)<\/p>/msi, + disabled => 0, + plain => 0, +); + +my %lyricsmania = ( + site => "lyricsmania.com", + name => "Lyricsmania", + regex => qr/Title: <b>.*?<br><br>(.*?)<script/msi, + disabled => 0, + plain => 0, +); + +my %letssingit = ( + site => "letssingit.com", + name => "Let's Sing It", + regex => qr/<TR class=row2><TD><PRE>(.*)<\/PRE><SPAN class=credits>/msi, + disabled => 0, + plain => 1, +); + +my %sing365 = ( + site => "sing365.com", + name => "Sing365", + #regex => qr/Print the Lyrics(.*?)<hr size=1 color=#cccccc>/msi, + regex => qr|Ringtones</u> <<(.*?)<TABLE cellSpacing="0"|msi, + disabled => 0, + plain => 0, +); + +my %azlyrics = ( + site => "azlyrics.com", + name => "AZLyrics", + regex => qr/<FONT size=2>.*?<BR>\s*(.*?)\[ <a href="http:\/\/www.azlyrics.com">www.azlyrics.com<\/a> \]<BR><BR>/msi, + disabled => 0, + plain => 0, +); + +my %l007 = ( + site => "lyrics007.com", + name => "Lyrics007", + #regex => qr/src=\"http:\/\/pagead2\.googlesyndication\.com\/pagead\/show_ads\.js\">\n<\/script>\n<br><br>(.*?)The hottest songs from/msi, + regex => qr|Ringtone <<(.*?)<a|msi, + disabled => 0, + plain => 0, +); + +my %actionext = ( + site => "actionext.com", + name => "Actionext", + regex => qr/<h3>performed by .*?<\/h3>(.*)<div class="foundat">/msi, + disabled => 0, + plain => 0, +); + +my %songmeanings = ( + site => "songmeanings.net", + name => "Song Meanings", + regex => qr/<td width="100%" style="text-align:left;">.*<td width="100%" style="text-align:left;">\s*(.*?)\s*<\/td>/msi, + disabled => 0, + plain => 0, +); + +my %wearethelyrics = ( + site => "wearethelyrics.com", + name => "We Are The Lyrics", + regex => qr/<\/h3>\n<p>\s*(.*?)\s*<\/p>/msi, + disabled => 0, + plain => 0, +); + +my %mp3bg = ( + site => "mp3-bg.com", + name => "mp3-bg.com", + regex => qr/<\/h2><p>(.*?)<ul class="admin">/msi, + disabled => 0, + plain => 0, +); + +my %mldb = ( + site => "mldb.org", + name => "MLDb", + regex => qr/<p class=songtext>(.*?)<\/table>/msi, + disabled => 0, + plain => 0, +); + +my %justsomelyrics = ( + site => "justsomelyrics.com", + name => "JUST SOME LYRICS", + regex => qr/<\/h1>(.*?)<a/msi, + disabled => 0, + plain => 0, +); + +my %mylyricsbox = ( + site => "mylyricsbox.com", + name => "MyLyricsBox", + regex => qr/<div class="songLyrics">(.*?)<\/div>/msi, + disabled => 0, + plain => 0, +); + +my %megalyrics = ( + site => "megalyrics.ru", + name => "MegaLyrics", + regex => qr/<\/script>[[:cntrl:]]*?<br><br>(.*?)<br><a href=\"javascript/msi, + disabled => 0, + plain => 0, +); + +my %lyricsee = ( + site => "lyrics.ee", + name => "Lyrics.ee", + regex => qr|</td></tr> -->*?<br>\n(.*?)<p><br>|msi, + disabled => 0, + plain => 0, +); + +my %lyricseeprint = ( + site => "lyrics.ee", + name => "Lyrics.ee (print page)", + regex => qr|<td height="20"></td>(.*?)</td>|msi, + disabled => 0, + plain => 0, +); + +my %kovach = ( + site => "kovach.co.yu", + name => "Kovach", + regex => qr#>Z</a>.*?<td width="100%" valign="top">(.*?)</td></tr></table>#msi, + disabled => 0, + plain => 0, +); + +my %letras = ( + site => "letras.terra.com.br", + name => "letras.terra.com.br", + regex => qr|<p id='cmp'>.*?</p>(.*?)</p><br/>|msi, + disabled => 0, + plain => 0, +); + +my %lyricstime = ( + site => "lyricstime.com", + name => "Lyrics Time", + regex => qr|.*END ADREACTOR ADVANCED CODE BLOCK -->(.*?)<!-- main content end|msi, + disabled => 0, + plain => 0, +); + +my %lyricsspot = ( + site => "lyricsspot.com", + name => "Lyricsspot", + regex => qr/<\/h3><font size="2">(.*?)<\/p><\/font>/msi, + disabled => 0, + plain => 0, +); + +my %local = ( + site => "~/lyrics", + name => "Local lyrics/cache", + disabled => 0, + plain => 1, +); + +#put references to all the lyrics sites into the hash + +my @sites = (\%metro,\%freel,\%hotly,\%leos, \%mma, \%lyricspy, \%lyricwiki, \%lyriki, \%letssingit, \%sing365, \%azlyrics, \%l007, \%actionext, \%songmeanings, \%wearethelyrics, \%mp3bg, \%mldb, \%justsomelyrics, \%mylyricsbox, \%megalyrics, \%lyricsmania, \%lyricsee, \%lyricseeprint, \%kovach, \%letras, \%lyricstime, \%lyricsspot); + + +sub querylyrics { + my $artist = urldecode(shift); + my $title = urldecode(shift); + + # This is for local file lyrics + my $fh = new IO::File; + + my $file = $title . ".txt"; + my $file2 = $artist . " - " . $title . ".txt"; + if (open(FH, "< " . $ENV{"HOME"} . "/lyrics/$file") || open(FH,"< " . $ENV{"HOME"} . "/lyrics/$file2")) { + my $text = ""; + while ($_ = <FH>) { + $text .= $_ + } + $fh->close; + showlyrics($text, \%local, "http://localhost", $artist, $title); + return 1; + } + + $artist =~ s/^The //sgi; #Remove the starting word "The" from artist name, it just causes problems + $title =~ s/\(.*?\)//sgi; + $title =~ s/\[.*?\]//sgi; + if ($artist eq "") { + $title =~ /(.*) - (.*)/; # try to extract song + artist information. + if ($1 ne '' && $2 ne '') { + $artist = $1; + $title = $2; + } + } + my $attempt = 1; + while ($attempt != 5) { +# print "\n<br>Attempt #" . $attempt . "\n"; + $mech->get("http://www.google.com/intl/en/"); + if (!$mech->success()) { + return "connectfail"; + } + # Try several search queries. + if ($attempt == 1) { + $mech->field("q", "lyrics intitle:\"$artist - $title\"", ); + } elsif ($attempt == 2) { + $mech->field("q", "lyrics \"$artist\" intitle:\"$title\"", ); + } elsif ($attempt == 3) { + $mech->field("q", "lyrics \"$artist\" \"$title\"", ); + } elsif ($attempt == 4) { + $mech->field("q", "lyrics $artist $title", ); + } + $mech->submit(); + foreach ($mech->content() =~ m/<div class=g[\s>].*?<a href=\"(.*?)\"/img) { + my $url = $_; +# print "\n<br>" . $url . "\n"; + my $o; + my $ly; + foreach $ly (@sites) { + my $urlregex = $ly->{site}; + if ($url =~ m/$urlregex/si) { + if ($o = scrape($url, $ly, $artist, $title)) { + return $o; + } else { + next; + } + } + } + } + $attempt = $attempt + 1; + } + return "Fail"; +} + +sub scrape { + my $loc = shift; + my $site = shift; + my $artist = shift; + my $title = shift; + if ($site->{disabled}) { + return 0; + } + $mech->get($loc); + if (!$mech->success()) { + return 0; #Assume the user _does_ have an internet connection since a previous test has happened on google, let's just say the lyrics site is down. + } + my @cont_type = $mech->response()->content_type; + $cont_type[1]=~ s/charset=(.*)/$1/ig; # Get the charset of the response + my $char_converter = Text::Iconv->new($cont_type[1], "UTF-8"); # Convert the response to UTF-8 + my $current = $mech->content(); + my $regex = $site->{regex}; + if ($current =~ $regex) { +# print "\n<br>Regex success for " . $site->{name} . "\n"; + showlyrics($char_converter->convert($1), $site, $loc, $artist, $title); + return 1; + } else { +# print "\n<br>Regex failed for " . $site->{name} . "\n"; + return 0; + } +} + +#while (1) { + my $message = <STDIN>; + chomp($message); + if ($message =~ /^configure/) { + system("dcop", "amarok", "playlist", "popupMessage", "This script does not require any configuration."); + } elsif ($message =~ /^fetchLyrics/) { + my @tofetch = split(/ /, $message); + my $artist = urldecode($tofetch[1]); + my $title = urldecode($tofetch[2]); + my $out = querylyrics($artist, $title); + if ($out eq "Fail") { + system("dcop", "amarok", "contextbrowser", "showLyrics", "<?xml version=\"1.0\" encoding=\"UTF-8\" ?> <suggestions page_url=\"http://www.google.org\">Failed to find any lyrics. Press refresh to try again.</suggestions>"); + } elsif ($out eq "connectfail") { + system("dcop", "amarok", "contextbrowser", "showLyrics", ""); #communications errror, "send an empty string" + } + } +#} + +sub showlyrics { + my $out = shift; + my $site = shift; + my $loc = shift; + my $artist = shift; + my $title = shift; + if ($site->{plain}) { + $out = striphtml($out); + } else { + $out = striphtml(htmllinebreak($out)); + } + $out =~ s/^\s+|\s+$//g; #Kills leading and trailing whitespace. + $out =~ s/\[.*? lyrics on http:\/\/www\.metrolyrics\.com\]\n//g; #metrolyrics: we're sick of your bullshit. + print $out . "\n"; + my $doc = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?> <lyrics site=\"" . encode_entities($site->{name}) ."\" site_url=\"" . encode_entities($site->{site}) . "\" page_url=\"" . encode_entities($loc) . "\" artist=\"" . filter($artist) . "\" title=\"" . filter($title) . "\">" . filter($out) . "</lyrics>"; + my $fh = new IO::File; + my $file = $artist . " - " . $title . " - Saved.txt"; + if ($fh->open("> " . $ENV{"HOME"} . "/lyrics/$file")) { + print $fh $out + } + system("dcop", "amarok", "contextbrowser", "showLyrics", $doc); +} +sub htmllinebreak { + my $out = shift; + $out =~ s/\n//sgi; #Kill normal linebreaks, we're going HTML :) + $out =~ s/<br>/\n/sgi; + $out =~ s/<br *\/?>/\n/sgi; + return $out; +} + +sub filter { + my $text = shift; + $text =~ s/&/&/go; + $text =~ s/</</go; + $text =~ s/>/>/go; + $text =~ s/'/'/go; + $text =~ s/`/'/go; + $text =~ s/’/'/go; + $text =~ s/"/"/go; + return $text; +} + +sub urldecode { + my $str = shift; + $str =~ s/%([A-Fa-f0-9]{2})/pack('C', hex($1))/seg; + return $str; +} + +sub striphtml { + my $str = shift; + $str =~ s/\<[^\<]+\>//g; + return $str; +} diff --git a/scripts/googlyrics.diff b/scripts/googlyrics.diff new file mode 100644 index 0000000..4e01f62 --- /dev/null +++ b/scripts/googlyrics.diff @@ -0,0 +1,49 @@ +--- /usr/share/apps/amarok/scripts/googlyrics/googlyrics 2008-02-13 23:52:25.000000000 +0100 ++++ googlyrics 2008-03-27 13:18:20.000000000 +0100 +@@ -264,7 +264,7 @@ + } + my $attempt = 1; + while ($attempt != 5) { +- print "\n<br>Attempt #" . $attempt . "\n"; ++# print "\n<br>Attempt #" . $attempt . "\n"; + $mech->get("http://www.google.com/intl/en/"); + if (!$mech->success()) { + return "connectfail"; +@@ -282,7 +282,7 @@ + $mech->submit(); + foreach ($mech->content() =~ m/<div class=g[\s>].*?<a href=\"(.*?)\"/img) { + my $url = $_; +- print "\n<br>" . $url . "\n"; ++# print "\n<br>" . $url . "\n"; + my $o; + my $ly; + foreach $ly (@sites) { +@@ -319,16 +319,16 @@ + my $current = $mech->content(); + my $regex = $site->{regex}; + if ($current =~ $regex) { +- print "\n<br>Regex success for " . $site->{name} . "\n"; ++# print "\n<br>Regex success for " . $site->{name} . "\n"; + showlyrics($char_converter->convert($1), $site, $loc, $artist, $title); + return 1; + } else { +- print "\n<br>Regex failed for " . $site->{name} . "\n"; ++# print "\n<br>Regex failed for " . $site->{name} . "\n"; + return 0; + } + } + +-while (1) { ++#while (1) { + my $message = <STDIN>; + chomp($message); + if ($message =~ /^configure/) { +@@ -344,7 +344,7 @@ + system("dcop", "amarok", "contextbrowser", "showLyrics", ""); #communications errror, "send an empty string" + } + } +-} ++#} + + sub showlyrics { + my $out = shift; diff --git a/scripts/muggle-image-convert b/scripts/muggle-image-convert index 34ed8d5..3348a69 100755 --- a/scripts/muggle-image-convert +++ b/scripts/muggle-image-convert @@ -9,17 +9,25 @@ FORMAT=pal # target image width/height (taking into account visible screen area) if [ "$FORMAT" = "ntsc" ]; then - TW=600 - TH=420 + TW=704 + TH=480 else - TW=632 - TH=512 + TW=704 + TH=576 fi TMP=$(mktemp ${TMPDIR:-/tmp}/image_convert.pnm.XXXXXX) || exit 2 IMG=$1 MPG=$2 +left=$3 +top=$4 +width=$5 +height=$6 +right=`expr $TW - $left - $width` +bottom=`expr $TH - $top - $height` + + DIR=$(dirname "$MPG") if [ ! -d "$DIR" ]; then mkdir -p "$DIR" || exit 2 @@ -60,27 +68,19 @@ case "$FILE_TYPE" in ;; esac -# -# extract the image size & compute scale value -# -LANG=C # get the decimal point right $TO_PNM "$IMG" >"$TMP" 2>/dev/null -S=$(pnmfile "$TMP" | awk '{ printf "%d %d ",$4,$6 }') -S=$(echo $S $TW $TH | awk '{ sw=$3/$1; sh=$4/$2; s=(sw<sh)?sw:sh; printf "%.4f\n",(s>1)?1.0:s; }') -# -# now run the conversion -# if [ "$FORMAT" = "ntsc" ]; then - pnmscale $S "$TMP" | \ - pnmpad -black -width 704 -height 480 | \ - ppmntsc | \ - ppmtoy4m -v 0 -n 1 -r -S 420mpeg2 -F 30000:1001 | \ +# untested + pnmscale -width $width -height $height "$TMP" | + pnmpad -black -left $left -top $top -right $right -bottom $bottom | + ppmntsc | + ppmtoy4m -v 0 -n 1 -r -S 420mpeg2 -F 30000:1001 | mpeg2enc -f 7 -T 90 -F 4 -nn -a 2 -v 0 -o "$MPG" else - pnmscale $S "$TMP" | \ - pnmpad -black -width 704 -height 576 | \ - ppmntsc --pal | \ - ppmtoy4m -v 0 -n 1 -r -S 420mpeg2 -F 25:1 | \ + pnmscale -width $width -height $height "$TMP" | + pnmpad -black -left $left -top $top -right $right -bottom $bottom | + ppmntsc --pal | + ppmtoy4m -v 0 -n 1 -r -S 420mpeg2 -F 25:1 | mpeg2enc -f 7 -T 90 -F 3 -np -a 2 -v 0 -o "$MPG" fi diff --git a/scripts/muggle_getlyrics b/scripts/muggle_getlyrics new file mode 100755 index 0000000..80be1cf --- /dev/null +++ b/scripts/muggle_getlyrics @@ -0,0 +1,35 @@ +rm -f "$3" +txtfound=0 +artist=`echo $1 | sed 's/ /%20/'g` +title=`echo $2 | sed 's/ /%20/'g` +echo ich bin $0 mit $1 $2 $3 artist=$artist title=$title>> /tmp/log.wr +echo fetchLyrics $artist $title | `dirname $0`/googlyrics 2>/dev/null | + sed 's/\x0d//g' | + sed 's/\xc2\xb4/\x27/g' | + sed 's/\xc3\x82\x27/\x27/g' | + sed 's/\xc3\x82/\x27/g' | + sed 's/\xc3\xb9/\x27/g' | + sed 's/\xe2\x80\x99/\x27/g' | + grep -ive 'NEW.*ringtones' | + recode HTML..utf8 | + sed 's/\xc2\x91/\x27/g' | # in unicode, those two are reserved for + sed 's/\xc2\x92/\x27/g' | # private use, but still some sites use them... +while read line +do +# strip starting empty lines + notempty=0 + test x"$line" = x || notempty=1; + test $notempty -eq 1 && txtfound=1; + test $txtfound -eq 1 -o $notempty -eq 1 && echo $line +done > "$3".loading + +# use .loading because the file is already there when googlyrics starts +# but muggle thinks we are done as soon as $3 exists + +if test -s "$3".loading +then + mv "$3".loading "$3" +else + rm -f "$3".loading +fi +test -s "$3" # we want the exit code |
