1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
|
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import with_statement
import os, sys, locale, re, codecs
import filecmp
from htmlentitydefs import name2codepoint as n2cp
# adjust MAXLINELENGTH to your needs
MAXLINELENGTH = 50
def writeFile(name,s):
"""create directory if needed. Wrap lines without splitting words."""
if not os.path.isdir(outdir):
os.mkdir(outdir)
outname = outdir + '/' + name
outfile = open(outname,"w")
lines = s.splitlines()
for line in lines:
# xa0 is unicode non breaking space
words = re.split(r' |\xa0',line)
widx1 = 0
while widx1 < len(words):
widx2 = len(words)
while widx2 > widx1 + 1 and len(' '.join(words[widx1:widx2])) > MAXLINELENGTH:
widx2 -= 1
newline = ' '.join(words[widx1:widx2]).rstrip()+'\n'
newline = newline.encode(locale.getdefaultlocale()[1])
outfile.write(newline)
widx1 = widx2
outfile.close
charset = locale.getdefaultlocale()[1]
title = sys.argv[1].decode(charset).encode('UTF8')
artist = sys.argv[2].decode(charset).encode('UTF8')
outfile = sys.argv[3]
lyricsfile = os.path.splitext(outfile)[0]
outdir = outfile+'.new'
# add other possible paths here:
googlFound = False
for scriptdir in ('/usr/share/apps/amarok/scripts/Googlyrics2', \
'NULL'):
if os.path.isdir(scriptdir+'/sites/'):
sys.path.append(scriptdir + "/lib/")
sys.path.append(scriptdir + "/sites/")
os.chdir(scriptdir)
googlFound = True
break
if not googlFound:
writeFile("Googlyrics2 is not installed\nSee http://quicode.com/googlyircs2")
sys.exit(0)
from Googlyrics import *
def substitute_entity(match):
ent = match.group(2)
if match.group(1) == "#":
return unichr(int(ent))
else:
cp = n2cp.get(ent)
if cp:
return unichr(cp)
else:
return match.group()
def decode_htmlentities(string):
entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});")
return entity_re.subn(substitute_entity, string)[0]
def countFiles():
if not os.path.isdir(outdir):
return 0
return len(os.listdir(outdir))
def load():
g = Googlyrics()
outlyric = g.find_lyrics(title, artist)
if len(outlyric) > 0:
for idx,item in enumerate(outlyric):
l = item.getLyric()
if l is None:
continue
if l.lyrics is None:
continue
if len(l.lyrics)<3:
continue
s = l.lyrics
s = decode_htmlentities(s)
# s = re.sub(r'&#([0-9]*);',lambda cc: unichr(int(cc.group(1))),s)
# replace lots of apostrophe variants with want we want:
s = s.replace(r'\xc2\xb4',r'\x27')
s = s.replace(r'\xc3\x82\x27',r'\x27')
s = s.replace(r'\xc3\x82',r'\x27')
s = s.replace(r'\xc3\xb9',r'\x27')
s = s.replace(r'\xe2\x80',r'x99/\x27')
s = s.replace(r'\n.*NEW.*ringtones.*\n',r'\n')
s = s.replace(r'\n.*---------.*\n',r'\n')
# in unicode, those two are reserved for
# private use, but still some sites use them..
s = s.replace(r'\xc2\x91',r'\x27')
s = s.replace(r'\xc2\x92',r'\x27')
writeFile(str(idx)+'-'+l.sitename,s)
# -----------------------------------------------
# main :
semaphore = outfile+'.loading'
with open(semaphore,"w") as f:
f.write('')
try:
if countFiles() == 0:
load()
if countFiles() > 0:
for item in os.listdir(outdir):
itemfile = outdir + '/' + item
if os.path.exists(lyricsfile):
if filecmp.cmp(lyricsfile,itemfile) == 1:
os.remove(itemfile)
if countFiles() > 0:
os.rename(outdir+'/'+os.listdir(outdir)[0],outfile)
if countFiles() == 0 and os.path.exists(outdir):
os.rmdir(outdir)
finally:
os.remove(semaphore)
|