summaryrefslogtreecommitdiff
path: root/src/libwebvi/webvi/request.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/libwebvi/webvi/request.py')
-rw-r--r--src/libwebvi/webvi/request.py617
1 files changed, 617 insertions, 0 deletions
diff --git a/src/libwebvi/webvi/request.py b/src/libwebvi/webvi/request.py
new file mode 100644
index 0000000..e19eb9c
--- /dev/null
+++ b/src/libwebvi/webvi/request.py
@@ -0,0 +1,617 @@
+# request.py - webvi request class
+#
+# Copyright (c) 2009, 2010 Antti Ajanki <antti.ajanki@iki.fi>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import urllib
+import libxml2
+import os.path
+import cStringIO
+import re
+import download
+import sys
+import utils
+import json2xml
+from constants import WebviRequestType
+
+DEBUG = False
+
+DEFAULT_TEMPLATE_PATH = '/usr/local/share/webvi/templates'
+template_path = DEFAULT_TEMPLATE_PATH
+
+def debug(msg):
+ if DEBUG:
+ if type(msg) == unicode:
+ sys.stderr.write(msg.encode('ascii', 'replace'))
+ else:
+ sys.stderr.write(msg)
+ sys.stderr.write('\n')
+
+def set_template_path(path):
+ global template_path
+
+ if path is None:
+ template_path = os.path.realpath(DEFAULT_TEMPLATE_PATH)
+ else:
+ template_path = os.path.realpath(path)
+
+ debug("set_template_path " + template_path)
+
+def parse_reference(reference):
+ """Parses URLs of the following form:
+
+ wvt:///youtube/video.xsl?srcurl=http%3A%2F%2Fwww.youtube.com%2F&param=name1,value1&param=name2,value2
+
+ reference is assumed to be URL-encoded UTF-8 string.
+
+ Returns (template, srcurl, params, processing_instructions) where
+ template if the URL path name (the part before ?), srcurl is the
+ parameter called srcurl, and params is a dictionary of (name,
+ quoted-value) pairs extracted from param parameters. Parameter
+ values are quoted so that the xslt parser handles them as string.
+ processing_instructions is dictionary of options that affect the
+ further processing of the data.
+ """
+ try:
+ reference = str(reference)
+ except UnicodeEncodeError:
+ return (None, None, None, None)
+
+ if not reference.startswith('wvt:///'):
+ return (None, None, None, None)
+
+ ref = reference[len('wvt:///'):]
+
+ template = None
+ srcurl = ''
+ parameters = {}
+ substitutions = {}
+ refsettings = {'HTTP-headers': {}}
+
+ fields = ref.split('?', 1)
+ template = fields[0]
+ if len(fields) == 1:
+ return (template, srcurl, parameters, refsettings)
+
+ for par in fields[1].split('&'):
+ paramfields = par.split('=', 1)
+ key = paramfields[0]
+
+ if len(paramfields) == 2:
+ value = urllib.unquote(paramfields[1])
+ else:
+ value = ''
+
+ if key.lower() == 'srcurl':
+ srcurl = value
+
+ elif key.lower() == 'param':
+ fields2 = value.split(',', 1)
+ pname = fields2[0].lower()
+ if len(fields2) == 2:
+ pvalue = "'" + fields2[1] + "'"
+ else:
+ pvalue = "''"
+ parameters[pname] = pvalue
+
+ elif key.lower() == 'subst':
+ substfields = value.split(',', 1)
+ if len(substfields) == 2:
+ substitutions[substfields[0]] = substfields[1]
+
+ elif key.lower() == 'minquality':
+ try:
+ refsettings['minquality'] = int(value)
+ except ValueError:
+ pass
+
+ elif key.lower() == 'maxquality':
+ try:
+ refsettings['maxquality'] = int(value)
+ except ValueError:
+ pass
+
+ elif key.lower() == 'postprocess':
+ refsettings.setdefault('postprocess', []).append(value)
+
+ elif key.lower() == 'contenttype':
+ refsettings['overridecontenttype'] = value
+
+ elif key.lower() == 'http-header':
+ try:
+ headername, headerdata = value.split(',', 1)
+ except ValueError:
+ continue
+ refsettings['HTTP-headers'][headername] = headerdata
+
+ if substitutions:
+ srcurl = brace_substitution(srcurl, substitutions)
+
+ return (template, srcurl, parameters, refsettings)
+
+def brace_substitution(template, subs):
+ """Substitute subs[x] for '{x}' in template. Unescape {{ to { and
+ }} to }. Unescaping is not done in substitution keys, i.e. while
+ scanning for a closing brace after a single opening brace."""
+ strbuf = cStringIO.StringIO()
+
+ last_pos = 0
+ for match in re.finditer(r'{{?|}}', template):
+ next_pos = match.start()
+ if next_pos < last_pos:
+ continue
+
+ strbuf.write(template[last_pos:next_pos])
+ if match.group(0) == '{{':
+ strbuf.write('{')
+ last_pos = next_pos+2
+
+ elif match.group(0) == '}}':
+ strbuf.write('}')
+ last_pos = next_pos+2
+
+ else: # match.group(0) == '{'
+ key_end = template.find('}', next_pos+1)
+ if key_end == -1:
+ strbuf.write(template[next_pos:])
+ last_pos = len(template)
+ break
+
+ try:
+ strbuf.write(urllib.quote(subs[template[next_pos+1:key_end]]))
+ except KeyError:
+ strbuf.write(template[next_pos:key_end+1])
+ last_pos = key_end+1
+
+ strbuf.write(template[last_pos:])
+ return strbuf.getvalue()
+
+
+class Request:
+ DEFAULT_URL_PRIORITY = 50
+
+ def __init__(self, reference, reqtype):
+ self.handle = None
+ self.dl = None
+
+ # state variables
+ self.xsltfile, self.srcurl, self.xsltparameters, self.processing = \
+ parse_reference(reference)
+ self.type = reqtype
+ self.status = -1
+ self.errmsg = None
+ self.mediaurls = []
+
+ # stream information
+ self.contenttype = 'text/xml'
+ self.contentlength = -1
+ self.streamtitle = ''
+
+ # callbacks
+ self.writefunc = None
+ self.writedata = None
+ self.readfunc = None
+ self.readdata = None
+
+ def handle_header(self, buf):
+ namedata = buf.split(':', 1)
+ if len(namedata) == 2:
+ headername, headerdata = namedata
+ if headername.lower() == 'content-type':
+ # Strip parameters like charset="utf-8"
+ self.contenttype = headerdata.split(';', 1)[0].strip()
+ elif headername.lower() == 'content-length':
+ try:
+ self.contentlength = int(headerdata.strip())
+ except ValueError:
+ self.contentlength = -1
+
+ def setup_downloader(self, url, writefunc, headerfunc, donefunc,
+ HTTPheaders=None, headers_only=False):
+ try:
+ self.dl = download.create_downloader(url,
+ template_path,
+ writefunc,
+ headerfunc,
+ donefunc,
+ HTTPheaders,
+ headers_only)
+ self.dl.start()
+ except download.DownloaderException, exc:
+ self.dl = None
+ if donefunc is not None:
+ donefunc(exc.code, exc.msg)
+
+ def start(self):
+ debug('start %s\ntemplate = %s, type = %s\n'
+ 'parameters = %s, processing = %s' %
+ (self.srcurl, self.xsltfile, self.type, str(self.xsltparameters),
+ str(self.processing)))
+
+ if self.type == WebviRequestType.MENU and self.srcurl == 'mainmenu':
+ self.send_mainmenu()
+ else:
+ self.setup_downloader(self.srcurl, None,
+ self.handle_header,
+ self.finished_apply_xslt,
+ self.processing['HTTP-headers'])
+
+ def stop(self):
+ if self.dl is not None:
+ debug("aborting")
+ self.dl.abort()
+
+ def start_download(self, url=None):
+ """Initialize a download.
+
+ If url is None, pop the first URL out of self.mediaurls. If
+ URL is an ASX playlist, read the content URL from it and start
+ to download the actual content.
+ """
+ while url is None or url == '':
+ try:
+ url = self.mediaurls.pop(0)
+ except IndexError:
+ self.request_done(406, 'No more URLs left')
+
+ debug('Start_download ' + url)
+
+ # reset stream status
+ self.contenttype = 'text/xml'
+ self.contentlength = -1
+
+ if self.is_asx_playlist(url):
+ self.setup_downloader(url, None,
+ self.handle_header,
+ self.finished_playlist_loaded,
+ self.processing['HTTP-headers'])
+
+ else:
+ self.setup_downloader(url, self.writewrapper,
+ self.handle_header,
+ self.finished_download,
+ self.processing['HTTP-headers'])
+
+ def check_and_send_url(self, url=None):
+ """Check if the target exists (currently only for HTTP URLs)
+ before relaying the URL to the client."""
+ while url is None or url == '':
+ try:
+ url = self.mediaurls.pop(0)
+ except IndexError:
+ self.request_done(406, 'No more URLs left')
+ return
+
+ debug('check_and_send_url ' + str(url))
+
+ if self.is_asx_playlist(url):
+ self.setup_downloader(url, None, self.handle_header,
+ self.finished_playlist_loaded,
+ self.processing['HTTP-headers'])
+ elif url.startswith('http://') or url.startswith('https://'):
+ self.checking_url = url
+ self.setup_downloader(url, None, None,
+ self.finished_check_url,
+ self.processing['HTTP-headers'], True)
+ else:
+ self.writewrapper(url)
+ self.request_done(0, None)
+
+ def send_mainmenu(self):
+ """Build the XML main menu from the module description files
+ in the hard drive.
+ """
+ if not os.path.isdir(template_path):
+ self.request_done(404, "Can't access service directory %s" %
+ template_path)
+ return
+
+ debug('Reading XSLT templates from ' + template_path)
+
+ # Find menu items in the service.xml files in the subdirectories
+ menuitems = {}
+ for f in os.listdir(template_path):
+ if f == 'bin':
+ continue
+
+ filename = os.path.join(template_path, f, 'service.xml')
+ try:
+ doc = libxml2.parseFile(filename)
+ except libxml2.parserError:
+ debug("Failed to parse " + filename);
+ continue
+
+ title = ''
+ url = ''
+
+ root = doc.getRootElement()
+ if (root is None) or (root.name != 'service'):
+ debug("Root node is not 'service' in " + filename);
+ doc.freeDoc()
+ continue
+ node = root.children
+ while node is not None:
+ if node.name == 'title':
+ title = utils.get_content_unicode(node)
+ elif node.name == 'ref':
+ url = utils.get_content_unicode(node)
+ node = node.next
+ doc.freeDoc()
+
+ if (title == '') or (url == ''):
+ debug("Empty <title> or <ref> in " + filename);
+ continue
+
+ menuitems[title.lower()] = ('<link>\n'
+ '<label>%s</label>\n'
+ '<ref>%s</ref>\n'
+ '</link>\n' %
+ (libxml2.newText(title),
+ libxml2.newText(url)))
+ # Sort the menu items
+ titles = menuitems.keys()
+ titles.sort()
+
+ # Build the menu
+ mainmenu = ('<?xml version="1.0"?>\n'
+ '<wvmenu>\n'
+ '<title>Select video source</title>\n')
+ for t in titles:
+ mainmenu += menuitems[t]
+ mainmenu += '</wvmenu>'
+
+ self.dl = download.DummyDownloader(mainmenu,
+ writefunc=self.writewrapper,
+ donefunc=self.request_done)
+ self.dl.start()
+
+ def writewrapper(self, inp):
+ """Wraps pycurl write callback (with the data as the only
+ parameter) into webvi write callback (with signature (data,
+ length, usertag)). If self.writefunc is not set, write to
+ stdout."""
+ if self.writefunc is not None:
+ inplen = len(inp)
+ written = self.writefunc(inp, inplen, self.writedata)
+ if written != inplen:
+ self.dl.close()
+ self.request_done(405, 'Write callback failed')
+ else:
+ sys.stdout.write(inp)
+
+ def is_asx_playlist(self, url):
+ if utils.get_url_extension(url).lower() == 'asx':
+ return True
+ else:
+ return False
+
+ def get_url_from_asx(self, asx, asxurl):
+ """Simple ASX parser. Return the content of the first <ref>
+ tag."""
+ try:
+ doc = libxml2.htmlReadDoc(asx, asxurl, None,
+ libxml2.HTML_PARSE_NOERROR |
+ libxml2.HTML_PARSE_NOWARNING |
+ libxml2.HTML_PARSE_NONET)
+ except libxml2.treeError:
+ debug('Can\'t parse ASX:\n' + asx)
+ return None
+ root = doc.getRootElement()
+ ret = self._get_ref_recursive(root).strip()
+ doc.freeDoc()
+ return ret
+
+ def _get_ref_recursive(self, node):
+ if node is None:
+ return None
+ if node.name.lower() == 'ref':
+ href = node.prop('href')
+ if href is not None:
+ return href
+ child = node.children
+ while child:
+ res = self._get_ref_recursive(child)
+ if res is not None:
+ return res
+ child = child.next
+ return None
+
+ def parse_mediaurl(self, xml, minpriority, maxpriority):
+ debug('parse_mediaurl\n' + xml)
+
+ self.streamtitle = '???'
+ mediaurls = []
+
+ try:
+ doc = libxml2.parseDoc(xml)
+ except libxml2.parserError:
+ debug('Invalid XML')
+ return mediaurls
+
+ root = doc.getRootElement()
+ if root is None:
+ debug('No root node')
+ return mediaurls
+
+ urls_and_priorities = []
+ node = root.children
+ while node:
+ if node.name == 'title':
+ self.streamtitle = utils.get_content_unicode(node)
+ elif node.name == 'url':
+ try:
+ priority = int(node.prop('priority'))
+ except (ValueError, TypeError):
+ priority = self.DEFAULT_URL_PRIORITY
+
+ content = node.getContent()
+ if priority >= minpriority and priority <= maxpriority and content != '':
+ urls_and_priorities.append((priority, content))
+ node = node.next
+ doc.freeDoc()
+
+ urls_and_priorities.sort()
+ urls_and_priorities.reverse()
+ mediaurls = [b[1] for b in urls_and_priorities]
+
+ return mediaurls
+
+ def finished_download(self, err, errmsg):
+ if err == 0:
+ self.request_done(0, None)
+ elif err != 402 and self.mediaurls:
+ debug('Download failed (%s %s).\nTrying the next one.' % (err, errmsg))
+ self.dl = None
+ self.start_download()
+ else:
+ self.request_done(err, errmsg)
+
+ def finished_playlist_loaded(self, err, errmsg):
+ if err == 0:
+ url = self.get_url_from_asx(self.dl.get_body(),
+ self.dl.get_url())
+ if url is None:
+ err = 404
+ errmsg = 'No ref tag in ASX file'
+ else:
+ if not self.is_asx_playlist(url) and url.startswith('http:'):
+ # The protocol is really "Windows Media HTTP
+ # Streaming Protocol", not plain HTTP, even though
+ # the scheme in the ASX file says "http://". We
+ # can't do MS-WMSP but luckily most MS-WMSP
+ # servers support MMS, too.
+ url = 'mms:' + url[5:]
+
+ if self.type == WebviRequestType.STREAMURL:
+ self.check_and_send_url(url)
+ else:
+ self.start_download(url)
+
+ if err != 0:
+ if not self.mediaurls:
+ self.request_done(err, errmsg)
+ else:
+ if self.type == WebviRequestType.STREAMURL:
+ self.check_and_send_url()
+ else:
+ self.start_download()
+
+ def finished_apply_xslt(self, err, errmsg):
+ if err != 0:
+ self.request_done(err, errmsg)
+ return
+
+ url = self.srcurl
+
+ # Add input documentURL to the parameters
+ params = self.xsltparameters.copy()
+ params['docurl'] = "'" + url + "'"
+
+ minpriority = self.processing.get('minquality', 0)
+ maxpriority = self.processing.get('maxquality', 100)
+
+ xsltpath = os.path.join(template_path, self.xsltfile)
+
+ # Check that xsltpath is inside the template directory
+ if os.path.commonprefix([template_path, os.path.realpath(xsltpath)]) != template_path:
+ self.request_done(503, 'Insecure template path')
+ return
+
+ xml = self.dl.get_body()
+ encoding = self.dl.get_encoding()
+
+ if self.processing.has_key('postprocess') and \
+ 'json2xml' in self.processing['postprocess']:
+ xmldoc = json2xml.json2xml(xml, encoding)
+ if xmldoc is None:
+ self.request_done(503, 'Invalid JSON content')
+ return
+ xml = xmldoc.serialize('utf-8')
+ encoding = 'utf-8'
+
+ #debug(xml)
+
+ resulttree = utils.apply_xslt(xml, encoding, url,
+ xsltpath, params)
+ if resulttree is None:
+ self.request_done(503, 'XSLT transformation failed')
+ return
+
+ if self.type == WebviRequestType.MENU:
+ debug("result:")
+ debug(resulttree)
+ self.writewrapper(resulttree)
+ self.request_done(0, None)
+ elif self.type == WebviRequestType.STREAMURL:
+ self.mediaurls = self.parse_mediaurl(resulttree, minpriority, maxpriority)
+ if self.mediaurls:
+ self.check_and_send_url()
+ else:
+ self.request_done(406, 'No valid URLs found')
+ elif self.type == WebviRequestType.FILE:
+ self.mediaurls = self.parse_mediaurl(resulttree, minpriority, maxpriority)
+ if self.mediaurls:
+ self.start_download()
+ else:
+ self.request_done(406, 'No valid URLs found')
+ else:
+ self.request_done(0, None)
+
+ def finished_extract_playlist_url(self, err, errmsg):
+ if err == 0:
+ url = self.get_url_from_asx(self.dl.get_body(),
+ self.dl.get_url())
+ if url is not None:
+ if self.is_asx_playlist(url):
+ self.setup_downloader(url, None, None,
+ self.finished_extract_playlist_url,
+ self.processing['HTTP-headers'])
+ else:
+ if url.startswith('http:'):
+ url = 'mms:' + url[5:]
+ self.check_and_send_url(url)
+ else:
+ self.request_done(503, 'XSLT tranformation failed to produce URL')
+ else:
+ self.request_done(err, errmsg)
+
+
+ def finished_check_url(self, err, errmsg):
+ if err == 0:
+ self.writewrapper(self.checking_url)
+ self.request_done(0, None)
+ else:
+ self.check_and_send_url()
+
+ def request_done(self, err, errmsg):
+ debug('request_done: %d %s' % (err, errmsg))
+
+ self.status = err
+ self.errmsg = errmsg
+ self.dl = None
+
+ def is_finished(self):
+ return self.status >= 0
+
+
+class RequestList(dict):
+ nextreqnum = 1
+
+ def put(self, req):
+ reqnum = RequestList.nextreqnum
+ RequestList.nextreqnum += 1
+ req.handle = reqnum
+ self[reqnum] = req
+ return reqnum