#!/usr/bin/env python # -*- coding: utf-8 -*- from __future__ import with_statement import os, sys, locale, re, codecs import filecmp from htmlentitydefs import name2codepoint as n2cp # adjust MAXLINELENGTH to your needs MAXLINELENGTH = 50 def writeFile(name,s): """create directory if needed. Wrap lines without splitting words.""" if not os.path.isdir(outdir): os.mkdir(outdir) outname = outdir + '/' + name outfile = open(outname,"w") lines = s.splitlines() for line in lines: # xa0 is unicode non breaking space words = re.split(r' |\xa0',line) widx1 = 0 while widx1 < len(words): widx2 = len(words) while widx2 > widx1 + 1 and len(' '.join(words[widx1:widx2])) > MAXLINELENGTH: widx2 -= 1 newline = ' '.join(words[widx1:widx2]).rstrip()+'\n' newline = newline.encode(locale.getdefaultlocale()[1]) outfile.write(newline) widx1 = widx2 outfile.close charset = locale.getdefaultlocale()[1] title = sys.argv[1].decode(charset).encode('UTF8') artist = sys.argv[2].decode(charset).encode('UTF8') outfile = sys.argv[3] lyricsfile = os.path.splitext(outfile)[0] outdir = outfile+'.new' # add other possible paths here: googlFound = False for scriptdir in ('/usr/share/apps/amarok/scripts/Googlyrics2', \ 'NULL'): if os.path.isdir(scriptdir+'/sites/'): sys.path.append(scriptdir + "/lib/") sys.path.append(scriptdir + "/sites/") os.chdir(scriptdir) googlFound = True break if not googlFound: writeFile("Googlyrics2 is not installed\nSee http://quicode.com/googlyircs2") sys.exit(0) from Googlyrics import * def substitute_entity(match): ent = match.group(2) if match.group(1) == "#": return unichr(int(ent)) else: cp = n2cp.get(ent) if cp: return unichr(cp) else: return match.group() def decode_htmlentities(string): entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});") return entity_re.subn(substitute_entity, string)[0] def countFiles(): if not os.path.isdir(outdir): return 0 return len(os.listdir(outdir)) def load(): g = Googlyrics() outlyric = g.find_lyrics(title, artist) if len(outlyric) > 0: for idx,item in enumerate(outlyric): l = item.getLyric() if l is None: continue if l.lyrics is None: continue if len(l.lyrics)<3: continue s = l.lyrics s = decode_htmlentities(s) # s = re.sub(r'&#([0-9]*);',lambda cc: unichr(int(cc.group(1))),s) # replace lots of apostrophe variants with want we want: s = s.replace(r'\xc2\xb4',r'\x27') s = s.replace(r'\xc3\x82\x27',r'\x27') s = s.replace(r'\xc3\x82',r'\x27') s = s.replace(r'\xc3\xb9',r'\x27') s = s.replace(r'\xe2\x80',r'x99/\x27') s = s.replace(r'\n.*NEW.*ringtones.*\n',r'\n') s = s.replace(r'\n.*---------.*\n',r'\n') # in unicode, those two are reserved for # private use, but still some sites use them.. s = s.replace(r'\xc2\x91',r'\x27') s = s.replace(r'\xc2\x92',r'\x27') writeFile(str(idx)+'-'+l.sitename,s) # ----------------------------------------------- # main : semaphore = outfile+'.loading' with open(semaphore,"w") as f: f.write('') try: if countFiles() == 0: load() if countFiles() > 0: for item in os.listdir(outdir): itemfile = outdir + '/' + item if os.path.exists(lyricsfile): if filecmp.cmp(lyricsfile,itemfile) == 1: os.remove(itemfile) if countFiles() > 0: os.rename(outdir+'/'+os.listdir(outdir)[0],outfile) if countFiles() == 0 and os.path.exists(outdir): os.rmdir(outdir) finally: os.remove(semaphore)