diff options
Diffstat (limited to 'src/libwebvi/webvi/request.py')
-rw-r--r-- | src/libwebvi/webvi/request.py | 617 |
1 files changed, 617 insertions, 0 deletions
diff --git a/src/libwebvi/webvi/request.py b/src/libwebvi/webvi/request.py new file mode 100644 index 0000000..e19eb9c --- /dev/null +++ b/src/libwebvi/webvi/request.py @@ -0,0 +1,617 @@ +# request.py - webvi request class +# +# Copyright (c) 2009, 2010 Antti Ajanki <antti.ajanki@iki.fi> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +import urllib +import libxml2 +import os.path +import cStringIO +import re +import download +import sys +import utils +import json2xml +from constants import WebviRequestType + +DEBUG = False + +DEFAULT_TEMPLATE_PATH = '/usr/local/share/webvi/templates' +template_path = DEFAULT_TEMPLATE_PATH + +def debug(msg): + if DEBUG: + if type(msg) == unicode: + sys.stderr.write(msg.encode('ascii', 'replace')) + else: + sys.stderr.write(msg) + sys.stderr.write('\n') + +def set_template_path(path): + global template_path + + if path is None: + template_path = os.path.realpath(DEFAULT_TEMPLATE_PATH) + else: + template_path = os.path.realpath(path) + + debug("set_template_path " + template_path) + +def parse_reference(reference): + """Parses URLs of the following form: + + wvt:///youtube/video.xsl?srcurl=http%3A%2F%2Fwww.youtube.com%2F¶m=name1,value1¶m=name2,value2 + + reference is assumed to be URL-encoded UTF-8 string. + + Returns (template, srcurl, params, processing_instructions) where + template if the URL path name (the part before ?), srcurl is the + parameter called srcurl, and params is a dictionary of (name, + quoted-value) pairs extracted from param parameters. Parameter + values are quoted so that the xslt parser handles them as string. + processing_instructions is dictionary of options that affect the + further processing of the data. + """ + try: + reference = str(reference) + except UnicodeEncodeError: + return (None, None, None, None) + + if not reference.startswith('wvt:///'): + return (None, None, None, None) + + ref = reference[len('wvt:///'):] + + template = None + srcurl = '' + parameters = {} + substitutions = {} + refsettings = {'HTTP-headers': {}} + + fields = ref.split('?', 1) + template = fields[0] + if len(fields) == 1: + return (template, srcurl, parameters, refsettings) + + for par in fields[1].split('&'): + paramfields = par.split('=', 1) + key = paramfields[0] + + if len(paramfields) == 2: + value = urllib.unquote(paramfields[1]) + else: + value = '' + + if key.lower() == 'srcurl': + srcurl = value + + elif key.lower() == 'param': + fields2 = value.split(',', 1) + pname = fields2[0].lower() + if len(fields2) == 2: + pvalue = "'" + fields2[1] + "'" + else: + pvalue = "''" + parameters[pname] = pvalue + + elif key.lower() == 'subst': + substfields = value.split(',', 1) + if len(substfields) == 2: + substitutions[substfields[0]] = substfields[1] + + elif key.lower() == 'minquality': + try: + refsettings['minquality'] = int(value) + except ValueError: + pass + + elif key.lower() == 'maxquality': + try: + refsettings['maxquality'] = int(value) + except ValueError: + pass + + elif key.lower() == 'postprocess': + refsettings.setdefault('postprocess', []).append(value) + + elif key.lower() == 'contenttype': + refsettings['overridecontenttype'] = value + + elif key.lower() == 'http-header': + try: + headername, headerdata = value.split(',', 1) + except ValueError: + continue + refsettings['HTTP-headers'][headername] = headerdata + + if substitutions: + srcurl = brace_substitution(srcurl, substitutions) + + return (template, srcurl, parameters, refsettings) + +def brace_substitution(template, subs): + """Substitute subs[x] for '{x}' in template. Unescape {{ to { and + }} to }. Unescaping is not done in substitution keys, i.e. while + scanning for a closing brace after a single opening brace.""" + strbuf = cStringIO.StringIO() + + last_pos = 0 + for match in re.finditer(r'{{?|}}', template): + next_pos = match.start() + if next_pos < last_pos: + continue + + strbuf.write(template[last_pos:next_pos]) + if match.group(0) == '{{': + strbuf.write('{') + last_pos = next_pos+2 + + elif match.group(0) == '}}': + strbuf.write('}') + last_pos = next_pos+2 + + else: # match.group(0) == '{' + key_end = template.find('}', next_pos+1) + if key_end == -1: + strbuf.write(template[next_pos:]) + last_pos = len(template) + break + + try: + strbuf.write(urllib.quote(subs[template[next_pos+1:key_end]])) + except KeyError: + strbuf.write(template[next_pos:key_end+1]) + last_pos = key_end+1 + + strbuf.write(template[last_pos:]) + return strbuf.getvalue() + + +class Request: + DEFAULT_URL_PRIORITY = 50 + + def __init__(self, reference, reqtype): + self.handle = None + self.dl = None + + # state variables + self.xsltfile, self.srcurl, self.xsltparameters, self.processing = \ + parse_reference(reference) + self.type = reqtype + self.status = -1 + self.errmsg = None + self.mediaurls = [] + + # stream information + self.contenttype = 'text/xml' + self.contentlength = -1 + self.streamtitle = '' + + # callbacks + self.writefunc = None + self.writedata = None + self.readfunc = None + self.readdata = None + + def handle_header(self, buf): + namedata = buf.split(':', 1) + if len(namedata) == 2: + headername, headerdata = namedata + if headername.lower() == 'content-type': + # Strip parameters like charset="utf-8" + self.contenttype = headerdata.split(';', 1)[0].strip() + elif headername.lower() == 'content-length': + try: + self.contentlength = int(headerdata.strip()) + except ValueError: + self.contentlength = -1 + + def setup_downloader(self, url, writefunc, headerfunc, donefunc, + HTTPheaders=None, headers_only=False): + try: + self.dl = download.create_downloader(url, + template_path, + writefunc, + headerfunc, + donefunc, + HTTPheaders, + headers_only) + self.dl.start() + except download.DownloaderException, exc: + self.dl = None + if donefunc is not None: + donefunc(exc.code, exc.msg) + + def start(self): + debug('start %s\ntemplate = %s, type = %s\n' + 'parameters = %s, processing = %s' % + (self.srcurl, self.xsltfile, self.type, str(self.xsltparameters), + str(self.processing))) + + if self.type == WebviRequestType.MENU and self.srcurl == 'mainmenu': + self.send_mainmenu() + else: + self.setup_downloader(self.srcurl, None, + self.handle_header, + self.finished_apply_xslt, + self.processing['HTTP-headers']) + + def stop(self): + if self.dl is not None: + debug("aborting") + self.dl.abort() + + def start_download(self, url=None): + """Initialize a download. + + If url is None, pop the first URL out of self.mediaurls. If + URL is an ASX playlist, read the content URL from it and start + to download the actual content. + """ + while url is None or url == '': + try: + url = self.mediaurls.pop(0) + except IndexError: + self.request_done(406, 'No more URLs left') + + debug('Start_download ' + url) + + # reset stream status + self.contenttype = 'text/xml' + self.contentlength = -1 + + if self.is_asx_playlist(url): + self.setup_downloader(url, None, + self.handle_header, + self.finished_playlist_loaded, + self.processing['HTTP-headers']) + + else: + self.setup_downloader(url, self.writewrapper, + self.handle_header, + self.finished_download, + self.processing['HTTP-headers']) + + def check_and_send_url(self, url=None): + """Check if the target exists (currently only for HTTP URLs) + before relaying the URL to the client.""" + while url is None or url == '': + try: + url = self.mediaurls.pop(0) + except IndexError: + self.request_done(406, 'No more URLs left') + return + + debug('check_and_send_url ' + str(url)) + + if self.is_asx_playlist(url): + self.setup_downloader(url, None, self.handle_header, + self.finished_playlist_loaded, + self.processing['HTTP-headers']) + elif url.startswith('http://') or url.startswith('https://'): + self.checking_url = url + self.setup_downloader(url, None, None, + self.finished_check_url, + self.processing['HTTP-headers'], True) + else: + self.writewrapper(url) + self.request_done(0, None) + + def send_mainmenu(self): + """Build the XML main menu from the module description files + in the hard drive. + """ + if not os.path.isdir(template_path): + self.request_done(404, "Can't access service directory %s" % + template_path) + return + + debug('Reading XSLT templates from ' + template_path) + + # Find menu items in the service.xml files in the subdirectories + menuitems = {} + for f in os.listdir(template_path): + if f == 'bin': + continue + + filename = os.path.join(template_path, f, 'service.xml') + try: + doc = libxml2.parseFile(filename) + except libxml2.parserError: + debug("Failed to parse " + filename); + continue + + title = '' + url = '' + + root = doc.getRootElement() + if (root is None) or (root.name != 'service'): + debug("Root node is not 'service' in " + filename); + doc.freeDoc() + continue + node = root.children + while node is not None: + if node.name == 'title': + title = utils.get_content_unicode(node) + elif node.name == 'ref': + url = utils.get_content_unicode(node) + node = node.next + doc.freeDoc() + + if (title == '') or (url == ''): + debug("Empty <title> or <ref> in " + filename); + continue + + menuitems[title.lower()] = ('<link>\n' + '<label>%s</label>\n' + '<ref>%s</ref>\n' + '</link>\n' % + (libxml2.newText(title), + libxml2.newText(url))) + # Sort the menu items + titles = menuitems.keys() + titles.sort() + + # Build the menu + mainmenu = ('<?xml version="1.0"?>\n' + '<wvmenu>\n' + '<title>Select video source</title>\n') + for t in titles: + mainmenu += menuitems[t] + mainmenu += '</wvmenu>' + + self.dl = download.DummyDownloader(mainmenu, + writefunc=self.writewrapper, + donefunc=self.request_done) + self.dl.start() + + def writewrapper(self, inp): + """Wraps pycurl write callback (with the data as the only + parameter) into webvi write callback (with signature (data, + length, usertag)). If self.writefunc is not set, write to + stdout.""" + if self.writefunc is not None: + inplen = len(inp) + written = self.writefunc(inp, inplen, self.writedata) + if written != inplen: + self.dl.close() + self.request_done(405, 'Write callback failed') + else: + sys.stdout.write(inp) + + def is_asx_playlist(self, url): + if utils.get_url_extension(url).lower() == 'asx': + return True + else: + return False + + def get_url_from_asx(self, asx, asxurl): + """Simple ASX parser. Return the content of the first <ref> + tag.""" + try: + doc = libxml2.htmlReadDoc(asx, asxurl, None, + libxml2.HTML_PARSE_NOERROR | + libxml2.HTML_PARSE_NOWARNING | + libxml2.HTML_PARSE_NONET) + except libxml2.treeError: + debug('Can\'t parse ASX:\n' + asx) + return None + root = doc.getRootElement() + ret = self._get_ref_recursive(root).strip() + doc.freeDoc() + return ret + + def _get_ref_recursive(self, node): + if node is None: + return None + if node.name.lower() == 'ref': + href = node.prop('href') + if href is not None: + return href + child = node.children + while child: + res = self._get_ref_recursive(child) + if res is not None: + return res + child = child.next + return None + + def parse_mediaurl(self, xml, minpriority, maxpriority): + debug('parse_mediaurl\n' + xml) + + self.streamtitle = '???' + mediaurls = [] + + try: + doc = libxml2.parseDoc(xml) + except libxml2.parserError: + debug('Invalid XML') + return mediaurls + + root = doc.getRootElement() + if root is None: + debug('No root node') + return mediaurls + + urls_and_priorities = [] + node = root.children + while node: + if node.name == 'title': + self.streamtitle = utils.get_content_unicode(node) + elif node.name == 'url': + try: + priority = int(node.prop('priority')) + except (ValueError, TypeError): + priority = self.DEFAULT_URL_PRIORITY + + content = node.getContent() + if priority >= minpriority and priority <= maxpriority and content != '': + urls_and_priorities.append((priority, content)) + node = node.next + doc.freeDoc() + + urls_and_priorities.sort() + urls_and_priorities.reverse() + mediaurls = [b[1] for b in urls_and_priorities] + + return mediaurls + + def finished_download(self, err, errmsg): + if err == 0: + self.request_done(0, None) + elif err != 402 and self.mediaurls: + debug('Download failed (%s %s).\nTrying the next one.' % (err, errmsg)) + self.dl = None + self.start_download() + else: + self.request_done(err, errmsg) + + def finished_playlist_loaded(self, err, errmsg): + if err == 0: + url = self.get_url_from_asx(self.dl.get_body(), + self.dl.get_url()) + if url is None: + err = 404 + errmsg = 'No ref tag in ASX file' + else: + if not self.is_asx_playlist(url) and url.startswith('http:'): + # The protocol is really "Windows Media HTTP + # Streaming Protocol", not plain HTTP, even though + # the scheme in the ASX file says "http://". We + # can't do MS-WMSP but luckily most MS-WMSP + # servers support MMS, too. + url = 'mms:' + url[5:] + + if self.type == WebviRequestType.STREAMURL: + self.check_and_send_url(url) + else: + self.start_download(url) + + if err != 0: + if not self.mediaurls: + self.request_done(err, errmsg) + else: + if self.type == WebviRequestType.STREAMURL: + self.check_and_send_url() + else: + self.start_download() + + def finished_apply_xslt(self, err, errmsg): + if err != 0: + self.request_done(err, errmsg) + return + + url = self.srcurl + + # Add input documentURL to the parameters + params = self.xsltparameters.copy() + params['docurl'] = "'" + url + "'" + + minpriority = self.processing.get('minquality', 0) + maxpriority = self.processing.get('maxquality', 100) + + xsltpath = os.path.join(template_path, self.xsltfile) + + # Check that xsltpath is inside the template directory + if os.path.commonprefix([template_path, os.path.realpath(xsltpath)]) != template_path: + self.request_done(503, 'Insecure template path') + return + + xml = self.dl.get_body() + encoding = self.dl.get_encoding() + + if self.processing.has_key('postprocess') and \ + 'json2xml' in self.processing['postprocess']: + xmldoc = json2xml.json2xml(xml, encoding) + if xmldoc is None: + self.request_done(503, 'Invalid JSON content') + return + xml = xmldoc.serialize('utf-8') + encoding = 'utf-8' + + #debug(xml) + + resulttree = utils.apply_xslt(xml, encoding, url, + xsltpath, params) + if resulttree is None: + self.request_done(503, 'XSLT transformation failed') + return + + if self.type == WebviRequestType.MENU: + debug("result:") + debug(resulttree) + self.writewrapper(resulttree) + self.request_done(0, None) + elif self.type == WebviRequestType.STREAMURL: + self.mediaurls = self.parse_mediaurl(resulttree, minpriority, maxpriority) + if self.mediaurls: + self.check_and_send_url() + else: + self.request_done(406, 'No valid URLs found') + elif self.type == WebviRequestType.FILE: + self.mediaurls = self.parse_mediaurl(resulttree, minpriority, maxpriority) + if self.mediaurls: + self.start_download() + else: + self.request_done(406, 'No valid URLs found') + else: + self.request_done(0, None) + + def finished_extract_playlist_url(self, err, errmsg): + if err == 0: + url = self.get_url_from_asx(self.dl.get_body(), + self.dl.get_url()) + if url is not None: + if self.is_asx_playlist(url): + self.setup_downloader(url, None, None, + self.finished_extract_playlist_url, + self.processing['HTTP-headers']) + else: + if url.startswith('http:'): + url = 'mms:' + url[5:] + self.check_and_send_url(url) + else: + self.request_done(503, 'XSLT tranformation failed to produce URL') + else: + self.request_done(err, errmsg) + + + def finished_check_url(self, err, errmsg): + if err == 0: + self.writewrapper(self.checking_url) + self.request_done(0, None) + else: + self.check_and_send_url() + + def request_done(self, err, errmsg): + debug('request_done: %d %s' % (err, errmsg)) + + self.status = err + self.errmsg = errmsg + self.dl = None + + def is_finished(self): + return self.status >= 0 + + +class RequestList(dict): + nextreqnum = 1 + + def put(self, req): + reqnum = RequestList.nextreqnum + RequestList.nextreqnum += 1 + req.handle = reqnum + self[reqnum] = req + return reqnum |