diff options
author | Wolfgang Rohdewald <wolfgang@rohdewald.de> | 2009-01-11 23:14:12 +0100 |
---|---|---|
committer | Wolfgang Rohdewald <wolfgang@rohdewald.de> | 2009-01-11 23:14:12 +0100 |
commit | c405cdff24427c485e493424f8dbd3a2c3061be8 (patch) | |
tree | 780634b8cf992010e92bc6b5edf96d0cacd45849 /scripts | |
parent | 2975f47469b4f881d01324d4905c6371ac358b4b (diff) | |
download | vdr-plugin-muggle-c405cdff24427c485e493424f8dbd3a2c3061be8.tar.gz vdr-plugin-muggle-c405cdff24427c485e493424f8dbd3a2c3061be8.tar.bz2 |
split lines now works with unicode. Fixes bug #52
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/muggle_getlyrics | 30 |
1 files changed, 22 insertions, 8 deletions
diff --git a/scripts/muggle_getlyrics b/scripts/muggle_getlyrics index f0d5701..b187487 100755 --- a/scripts/muggle_getlyrics +++ b/scripts/muggle_getlyrics @@ -6,6 +6,28 @@ import os, sys, locale, re, codecs import filecmp from htmlentitydefs import name2codepoint as n2cp +MAXLINELENGTH = 50 + +def writeFile(name,s): + if not os.path.isdir(outdir): + os.mkdir(outdir) + outname = outdir + '/' + name + outfile = open(outname,"w") + lines = s.splitlines() + for line in lines: +# xa0 is unicode non breaking space + words = re.split(r' |\xa0',line) + widx1 = 0 + while widx1 < len(words): + widx2 = len(words) + while widx2 > widx1 + 1 and len(' '.join(words[widx1:widx2])) > MAXLINELENGTH: + widx2 -= 1 + newline = ' '.join(words[widx1:widx2])+'\n' + newline = newline.encode(locale.getdefaultlocale()[1]) + outfile.write(newline) + widx1 = widx2 + outfile.close + charset = locale.getdefaultlocale()[1] title = sys.argv[1].decode(charset).encode('UTF8') artist = sys.argv[2].decode(charset).encode('UTF8') @@ -52,13 +74,6 @@ def countFiles(): return 0 return len(os.listdir(outdir)) -def writeFile(name,s): - if not os.path.isdir(outdir): - os.mkdir(outdir) - outfile = open(outdir + '/' + name,"w") - outfile.write(s) - outfile.close - def load(debug=False): if debug: outlyric=["Version 1","Version 2","Version 3"] @@ -98,7 +113,6 @@ def load(debug=False): # private use, but still some sites use them.. s = s.replace(r'\xc2\x91',r'\x27') s = s.replace(r'\xc2\x92',r'\x27') - s = s.encode(locale.getdefaultlocale()[1]) writeFile(str(idx)+'-'+l.sitename,s) # ----------------------------------------------- |