diff options
author | Wolfgang Rohdewald <wolfgang@rohdewald.de> | 2009-01-11 18:49:32 +0100 |
---|---|---|
committer | Wolfgang Rohdewald <wolfgang@rohdewald.de> | 2009-01-11 18:49:32 +0100 |
commit | 45ccb381fdc28e4a33f8494d6352b64963d30c7b (patch) | |
tree | 994efa4ac06959a08d0b7c07f3556af9980f5a14 | |
parent | 343e1dfc3d31e49f74c3d006e3bc4a882c1aa7a7 (diff) | |
download | vdr-plugin-muggle-45ccb381fdc28e4a33f8494d6352b64963d30c7b.tar.gz vdr-plugin-muggle-45ccb381fdc28e4a33f8494d6352b64963d30c7b.tar.bz2 |
rewrote the shell script muggle_getlyrics in python and
talk directly to the googlyrics2 python code
-rw-r--r-- | HISTORY | 2 | ||||
-rw-r--r-- | README | 4 | ||||
-rwxr-xr-x | scripts/mgLyric.py | 52 | ||||
-rwxr-xr-x | scripts/muggle_getlyrics | 198 |
4 files changed, 131 insertions, 125 deletions
@@ -385,3 +385,5 @@ Balke. - Lyrics now work with Googlyrics2 - if several versions of lyrics for a song are found, it is now possible to choose among them. +- Lyrics: rewrite the shell script muggle_getlyrics in python and talk + directly to the Googlyrics2 python code @@ -57,7 +57,9 @@ required: http://www.xiph.org/ogg/vorbis/) - optionally libFLAC++ to replay FLAC files (Debian package libflac++-dev or sources from flac.sourceforge.net) - - recode, python and Googlyrics2 if you want to download lyrics + - recode, python, python-chardet and Googlyrics2 if you want to download lyrics. + Googlyrics2 must be newer than Beta3, otherwise umlauts will be + wrong The developer versions are needed because their headers are required for compilation. The server need not be on the same machine as the diff --git a/scripts/mgLyric.py b/scripts/mgLyric.py deleted file mode 100755 index 97042f5..0000000 --- a/scripts/mgLyric.py +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env python -import os, sys, locale - -title = sys.argv[1] -artist = sys.argv[2] -outdir = sys.argv[3] - -# add other possible paths here: -googlFound = False -for scriptdir in ('/usr/share/apps/amarok/scripts/Googlyrics2', \ - 'NULL'): - if os.path.isdir(scriptdir+'/sites/'): - sys.path.append(scriptdir + "/lib/") - sys.path.append(scriptdir + "/sites/") - os.chdir(scriptdir) - googlFound = True - break - -if not googlFound: - outfile = open(outdir + '/1.raw',"w") - outfile.write("Googlyrics2 is not installed\nSee http://quicode.com/googlyircs2") - outfile.close - sys.exit(0) - -Debugging = False - -if Debugging: - outlyric=["Version 1","Version 2","Version 3"] - for idx,item in enumerate(outlyric): - outfile = open(outdir + '/' + str(idx) + '.raw',"w") - outfile.write(item) - outfile.close - sys.exit(0) - -from Googlyrics import * -g = Googlyrics() - -outlyric = g.find_lyrics(title, artist) -if len(outlyric) > 0: - for idx,item in enumerate(outlyric): - l = item.getLyric() - if l is not None: - if l.lyrics is not None: - if len(l.lyrics)>2: -# if we pipe or write output to a file, python by default recodes into ascii, -# and sys.stdout.encoding is also set to ascii. But if the system -# default locale is for example utf-8, we also want the file to be -# encoded like that - outfile = open(outdir + '/' + str(idx) + '.raw',"w") - outfile.write(l.lyrics.encode(locale.getdefaultlocale()[1])) - outfile.close - diff --git a/scripts/muggle_getlyrics b/scripts/muggle_getlyrics index db7fbdf..f0d5701 100755 --- a/scripts/muggle_getlyrics +++ b/scripts/muggle_getlyrics @@ -1,72 +1,126 @@ -#!/bin/sh - -# usage: -# artist title outfile -# -# we might find several different lyrics for a song. This is handled -# by this script: We use a subdirectory outfile.new. If it exists -# and contains a file, we just return that file and remove it from -# outfile.new. If outfile.new is empty, we read all versions again -# with googlyrics. - -# ultimately I want to integrate this shell script into mgLyrics.py - -artist="$1" -title="$2" -outfile="$3" -orgoutfile=`echo "$3" | sed 's/.tmp$//'` -outdir="$3.new" - -echo > "$outfile".loading - -if test ! -d "$outdir" -then - mkdir "$outdir" - MGLYRICS="`dirname $0`/mgLyric.py" - if test ! -x "$MGLYRICS" - then - echo $MGLYRICS not found > "$outdir"/1.raw - else - "$MGLYRICS" "$title" "$artist" "$outdir" - fi - - for i in "$outdir"/*.raw - do - test -r "$i" || break - cat "$i" | - sed 's/\x0d//g' | - sed 's/\xc2\xb4/\x27/g' | - sed 's/\xc3\x82\x27/\x27/g' | - sed 's/\xc3\x82/\x27/g' | - sed 's/\xc3\xb9/\x27/g' | - sed 's/\xe2\x80\x99/\x27/g' | - grep -ive 'NEW.*ringtones' | - grep -v -e '--------------' | - recode HTML..utf8 | - sed 's/\xc2\x91/\x27/g' | # in unicode, those two are reserved for - sed 's/\xc2\x92/\x27/g' | # private use, but still some sites use them... - cat > "$i.converted" 2>/dev/null - rm -f "$i" - echo cmp -s "$i.converted" "$orgoutfile">>/tmp/log.wr - cmp -s "$i.converted" "$orgoutfile" - if test $? -eq 0 - then - echo rm -f "$i.converted" >>/tmp/log.wr - rm -f "$i.converted" - continue - fi - done -fi - -rmdir "$outdir" 2>/dev/null -if test -d "$outdir" -then - ls -l "$outdir" - ls "$outdir"/*.converted | head -1 | while read fn - do - test -r "$fn" || break - mv "$fn" "$outfile" - done - rmdir "$outdir" 2>/dev/null -fi -rm -f "$outfile".loading +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from __future__ import with_statement +import os, sys, locale, re, codecs +import filecmp +from htmlentitydefs import name2codepoint as n2cp + +charset = locale.getdefaultlocale()[1] +title = sys.argv[1].decode(charset).encode('UTF8') +artist = sys.argv[2].decode(charset).encode('UTF8') +outfile = sys.argv[3] + +lyricsfile = os.path.splitext(outfile)[0] +outdir = outfile+'.new' + +# add other possible paths here: +googlFound = False +for scriptdir in ('/usr/share/apps/amarok/scripts/Googlyrics2', \ + 'NULL'): + if os.path.isdir(scriptdir+'/sites/'): + sys.path.append(scriptdir + "/lib/") + sys.path.append(scriptdir + "/sites/") + os.chdir(scriptdir) + googlFound = True + break + +if not googlFound: + writeFile("Googlyrics2 is not installed\nSee http://quicode.com/googlyircs2") + sys.exit(0) + +from Googlyrics import * + +def substitute_entity(match): + ent = match.group(2) + if match.group(1) == "#": + return unichr(int(ent)) + else: + cp = n2cp.get(ent) + + if cp: + return unichr(cp) + else: + return match.group() + +def decode_htmlentities(string): + entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});") + return entity_re.subn(substitute_entity, string)[0] + +def countFiles(): + if not os.path.isdir(outdir): + return 0 + return len(os.listdir(outdir)) + +def writeFile(name,s): + if not os.path.isdir(outdir): + os.mkdir(outdir) + outfile = open(outdir + '/' + name,"w") + outfile.write(s) + outfile.close + +def load(debug=False): + if debug: + outlyric=["Version 1","Version 2","Version 3"] + for idx,item in enumerate(outlyric): + outfile = open(outdir + '/' + str(idx) + '.raw',"w") + outfile.write(item) + outfile.close + sys.exit(0) + + g = Googlyrics() + + outlyric = g.find_lyrics(title, artist) + if len(outlyric) > 0: + print 'versions:',len(outlyric) + for idx,item in enumerate(outlyric): + l = item.getLyric() + if l is None: + continue + if l.lyrics is None: + print 'lyrics is None' + continue + if len(l.lyrics)<3: + print 'lyrics is too short',l.lyrics + continue + s = l.lyrics + s = decode_htmlentities(s) +# s = re.sub(r'&#([0-9]*);',lambda cc: unichr(int(cc.group(1))),s) +# replace lots of apostrophe variants with want we want: + s = s.replace(r'\xc2\xb4',r'\x27') + s = s.replace(r'\xc3\x82\x27',r'\x27') + s = s.replace(r'\xc3\x82',r'\x27') + s = s.replace(r'\xc3\xb9',r'\x27') + s = s.replace(r'\xe2\x80',r'x99/\x27') + s = s.replace(r'\n.*NEW.*ringtones.*\n',r'\n') + s = s.replace(r'\n.*---------.*\n',r'\n') +# in unicode, those two are reserved for +# private use, but still some sites use them.. + s = s.replace(r'\xc2\x91',r'\x27') + s = s.replace(r'\xc2\x92',r'\x27') + s = s.encode(locale.getdefaultlocale()[1]) + writeFile(str(idx)+'-'+l.sitename,s) + +# ----------------------------------------------- +# main : + +semaphore = outfile+'.loading' +with open(semaphore,"w") as f: + f.write('') +try: + if countFiles() == 0: + load() + if countFiles() > 0: + for item in os.listdir(outdir): + itemfile = outdir + '/' + item + if os.path.exists(lyricsfile): + if filecmp.cmp(lyricsfile,itemfile) == 1: + print 'removing duplicate ',itemfile + os.remove(itemfile) + if countFiles() > 0: + os.rename(outdir+'/'+os.listdir(outdir)[0],outfile) + if countFiles() == 0 and os.path.exists(outdir): + os.rmdir(outdir) +finally: + os.remove(semaphore) + |