diff options
Diffstat (limited to 'src/libwebvi')
-rw-r--r-- | src/libwebvi/Makefile | 34 | ||||
-rw-r--r-- | src/libwebvi/libwebvi.c | 814 | ||||
-rw-r--r-- | src/libwebvi/libwebvi.h | 330 | ||||
-rwxr-xr-x | src/libwebvi/pythonlibname.py | 14 | ||||
-rw-r--r-- | src/libwebvi/webvi/__init__.py | 1 | ||||
-rw-r--r-- | src/libwebvi/webvi/api.py | 289 | ||||
-rw-r--r-- | src/libwebvi/webvi/asyncurl.py | 389 | ||||
-rw-r--r-- | src/libwebvi/webvi/constants.py | 50 | ||||
-rw-r--r-- | src/libwebvi/webvi/download.py | 470 | ||||
-rw-r--r-- | src/libwebvi/webvi/json2xml.py | 69 | ||||
-rw-r--r-- | src/libwebvi/webvi/request.py | 617 | ||||
-rw-r--r-- | src/libwebvi/webvi/utils.py | 134 | ||||
-rw-r--r-- | src/libwebvi/webvi/version.py | 20 |
13 files changed, 3231 insertions, 0 deletions
diff --git a/src/libwebvi/Makefile b/src/libwebvi/Makefile new file mode 100644 index 0000000..131c4a7 --- /dev/null +++ b/src/libwebvi/Makefile @@ -0,0 +1,34 @@ +PREFIX ?= /usr/local + +LIBNAME=libwebvi.so +LIBSONAME=$(LIBNAME).0 +LIBMINOR=$(LIBSONAME).4 + +VERSION:=$(shell cat ../version) +PYLIB:=$(shell python pythonlibname.py) +DEFINES:=-DPYTHONSHAREDLIB=\"$(PYLIB)\" -DLIBWEBVI_VERSION=\"$(VERSION)\" +# append -DDEBUG to DEFINES to get debug output + +all: $(LIBMINOR) + +libwebvi.o: libwebvi.c libwebvi.h + $(CC) -fPIC -Wall -O2 -g $(CFLAGS) $(DEFINES) `python-config --cflags` -c -o libwebvi.o libwebvi.c + +$(LIBMINOR): libwebvi.o + $(CC) -shared -Wl,-soname,$(LIBSONAME) -Wl,--as-needed libwebvi.o `python-config --ldflags` -o $(LIBMINOR) + ln -sf $(LIBMINOR) $(LIBSONAME) + ln -sf $(LIBSONAME) $(LIBNAME) + +libwebvi.a: libwebvi.o + ar rsc libwebvi.a libwebvi.o + +clean: + rm -f *.o *~ libwebvi.so* libwebvi.a + rm -f webvi/*.pyc webvi/*~ + +install: $(LIBMINOR) + mkdir -p $(PREFIX)/lib + cp --remove-destination -d $(LIBNAME)* $(PREFIX)/lib + /sbin/ldconfig $(PREFIX)/lib + +.PHONY: clean install diff --git a/src/libwebvi/libwebvi.c b/src/libwebvi/libwebvi.c new file mode 100644 index 0000000..c4d9aed --- /dev/null +++ b/src/libwebvi/libwebvi.c @@ -0,0 +1,814 @@ +/* + * libwebvi.c: C bindings for webvi Python module + * + * Copyright (c) 2010 Antti Ajanki <antti.ajanki@iki.fi> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <Python.h> +#include <stdio.h> +#include <dlfcn.h> + +#include "libwebvi.h" + +static const char *VERSION = "libwebvi/" LIBWEBVI_VERSION; + +static const int MAX_ERROR_MESSAGE_LENGTH = 512; +static const int MAX_MSG_STRING_LENGTH = 512; + +static PyThreadState *main_state = NULL; + +typedef struct per_interpreter_data_t { + PyThreadState *interp; + PyObject *webvi_module; + char *last_error; + WebviMsg latest_message; +} per_interpreter_data; + +#ifdef DEBUG + +#define debug(x...) fprintf(stderr, x) +#define handle_pyerr() { if (PyErr_Occurred()) { PyErr_Print(); } } + +#else + +#define debug(x...) +#define handle_pyerr() PyErr_Clear() + +#endif + + +/********************************************************************** + * + * Internal functions + */ + +static PyObject *call_python(PyObject *webvi_module, + const char *funcname, + PyObject *args) { + PyObject *func, *val = NULL; + +#ifdef DEBUG + debug("call_python %s ", funcname); + if (PyObject_Print(args, stderr, 0) == -1) + debug("<print failed>"); + debug("\n"); +#endif + + func = PyObject_GetAttrString(webvi_module, funcname); + if (func) { + val = PyObject_CallObject(func, args); + + Py_DECREF(func); + } + + return val; +} + +static long set_callback(PyObject *webvi_module, WebviHandle h, + WebviOption callbacktype, + webvi_callback callback, + PyObject *prototype) { + long res = WEBVIERR_UNKNOWN_ERROR; + + if (prototype && PyCallable_Check(prototype)) { + PyObject *args = Py_BuildValue("(l)", (long)callback); + PyObject *val = PyObject_CallObject(prototype, args); + Py_DECREF(args); + + if (val) { + PyObject *webvihandle = PyInt_FromLong(h); + PyObject *option = PyInt_FromLong(callbacktype); + PyObject *args2 = PyTuple_Pack(3, webvihandle, option, val); + PyObject *retval = call_python(webvi_module, "set_opt", args2); + Py_DECREF(args2); + Py_DECREF(option); + Py_DECREF(webvihandle); + Py_DECREF(val); + + if (retval) { + if (PyInt_Check(retval)) + res = PyInt_AsLong(retval); + Py_DECREF(retval); + } + } + } + + if (res == WEBVIERR_UNKNOWN_ERROR) + handle_pyerr(); + + return res; +} + +/* + * Converts PyInt to WebviResult. + * + * If intobject is NULL, assumes that a Python exception has occurred. + */ +static WebviResult pyint_as_webviresult(PyObject *intobject) { + if (intobject && PyInt_Check(intobject)) + return PyInt_AsLong(intobject); + + handle_pyerr(); + + return WEBVIERR_UNKNOWN_ERROR; +} + +/* + * Duplicate Python string as C string. If the parameter is a unicode + * object, it is encoded to UTF-8. The caller must free the returned + * memory. + */ +static char *PyString_strdupUTF8(PyObject *string) { + char *buffer = NULL; + Py_ssize_t len = -1; + char *ret = NULL; + PyObject *realstring = string; + Py_INCREF(realstring); + + if (PyUnicode_Check(realstring)) { + PyObject *encoded = PyUnicode_AsUTF8String(realstring); + if (encoded) { + Py_DECREF(realstring); + realstring = encoded; + } else { + handle_pyerr(); + } + } + + if (PyString_AsStringAndSize(realstring, &buffer, &len) == -1) { + handle_pyerr(); + buffer = ""; + len = 0; + } + + if (buffer) { + ret = (char *)malloc((len+1)*sizeof(char)); + if (ret) + memcpy(ret, buffer, len+1); + } + + Py_DECREF(realstring); + + return ret; +} + +/********************************************************************** + * + * Public functions + */ + +int webvi_global_init() { + if (main_state) + return 0; + + // Python modules in lib-dynload/*.so do not correctly depend on + // libpython*.so. We need to dlopen the library here, otherwise + // importing webvi dies with "undefined symbol: + // PyExc_ValueError". See http://bugs.python.org/issue4434 + dlopen(PYTHONSHAREDLIB, RTLD_LAZY | RTLD_GLOBAL); + + Py_InitializeEx(0); + PyEval_InitThreads(); + main_state = PyThreadState_Get(); + PyEval_ReleaseLock(); /* release GIL acquired by PyEval_InitThreads */ + + return 0; +} + +void webvi_cleanup(int cleanup_python) { + /* Should we kill active interpreters first? */ + + if (cleanup_python != 0) { + PyEval_AcquireLock(); + PyThreadState_Swap(main_state); + Py_Finalize(); + } +} + +WebviCtx webvi_initialize_context(void) { + per_interpreter_data *ctx = (per_interpreter_data *)malloc(sizeof(per_interpreter_data)); + if (!ctx) + goto err; + + PyEval_AcquireLock(); + + ctx->interp = NULL; + ctx->last_error = NULL; + ctx->latest_message.msg = 0; + ctx->latest_message.handle = -1; + ctx->latest_message.status_code = -1; + ctx->latest_message.data = (char *)malloc(MAX_MSG_STRING_LENGTH*sizeof(char)); + if (!ctx->latest_message.data) + goto err; + + ctx->interp = Py_NewInterpreter(); + if (!ctx->interp) { + debug("Py_NewInterpreter failed\n"); + goto err; + } + + PyThreadState_Swap(ctx->interp); + + ctx->webvi_module = PyImport_ImportModule("webvi.api"); + if (!ctx->webvi_module) { + debug("import webvi.api failed\n"); + handle_pyerr(); + goto err; + } + + /* These are used to wrap C-callbacks into Python callables. + Keep in sync with libwebvi.h. */ + if (PyRun_SimpleString("from ctypes import CFUNCTYPE, c_int, c_size_t, c_char_p, c_void_p\n" + "WriteCallback = CFUNCTYPE(c_size_t, c_char_p, c_size_t, c_void_p)\n" + "ReadCallback = CFUNCTYPE(c_size_t, c_char_p, c_size_t, c_void_p)\n") != 0) { + debug("callback definitions failed\n"); + goto err; + } + + PyEval_ReleaseThread(ctx->interp); + + return (WebviCtx)ctx; + +err: + if (ctx) { + if (ctx->interp) { + Py_EndInterpreter(ctx->interp); + PyThreadState_Swap(NULL); + } + + PyEval_ReleaseLock(); + + if (ctx->latest_message.data) + free(ctx->latest_message.data); + free(ctx); + } + + return 0; +} + +void webvi_cleanup_context(WebviCtx ctx) { + if (ctx == 0) + return; + + per_interpreter_data *c = (per_interpreter_data *)ctx; + + PyThreadState_Swap(c->interp); + + /* FIXME: explicitly terminate all active handles? */ + + Py_DECREF(c->webvi_module); + c->webvi_module = NULL; + + Py_EndInterpreter(c->interp); + c->interp = NULL; + + PyThreadState_Swap(NULL); + + free(c); +} + +const char* webvi_version(void) { + return VERSION; +} + +const char* webvi_strerror(WebviCtx ctx, WebviResult res) { + char *errmsg; + + per_interpreter_data *c = (per_interpreter_data *)ctx; + + if (!c->last_error) { + /* We are going to leak c->last_error */ + c->last_error = (char *)malloc(MAX_ERROR_MESSAGE_LENGTH*sizeof(char)); + if (!c->last_error) + return NULL; + } + + PyEval_AcquireThread(c->interp); + + PyObject *args = Py_BuildValue("(i)", res); + PyObject *msg = call_python(c->webvi_module, "strerror", args); + Py_DECREF(args); + + if (msg) { + errmsg = PyString_AsString(msg); + if (!errmsg) { + handle_pyerr(); + errmsg = "Internal error"; + } + + strncpy(c->last_error, errmsg, MAX_ERROR_MESSAGE_LENGTH-1); + c->last_error[MAX_ERROR_MESSAGE_LENGTH] = '\0'; + + Py_DECREF(msg); + } else { + handle_pyerr(); + } + + PyEval_ReleaseThread(c->interp); + + return c->last_error; +} + +WebviResult webvi_set_config(WebviCtx ctx, WebviConfig conf, const char *value) { + WebviResult res; + per_interpreter_data *c = (per_interpreter_data *)ctx; + + PyEval_AcquireThread(c->interp); + + PyObject *args = Py_BuildValue("(is)", conf, value); + PyObject *v = call_python(c->webvi_module, "set_config", args); + Py_DECREF(args); + + res = pyint_as_webviresult(v); + Py_XDECREF(v); + + PyEval_ReleaseThread(c->interp); + + return res; +} + +WebviHandle webvi_new_request(WebviCtx ctx, const char *webvireference, WebviRequestType type) { + WebviHandle res = -1; + per_interpreter_data *c = (per_interpreter_data *)ctx; + + PyEval_AcquireThread(c->interp); + + PyObject *args = Py_BuildValue("(si)", webvireference, type); + PyObject *v = call_python(c->webvi_module, "new_request", args); + Py_DECREF(args); + + res = pyint_as_webviresult(v); + Py_XDECREF(v); + + PyEval_ReleaseThread(c->interp); + + return res; +} + +WebviResult webvi_start_handle(WebviCtx ctx, WebviHandle h) { + WebviResult res; + per_interpreter_data *c = (per_interpreter_data *)ctx; + + PyEval_AcquireThread(c->interp); + + PyObject *args = Py_BuildValue("(i)", h); + PyObject *v = call_python(c->webvi_module, "start_handle", args); + Py_DECREF(args); + + res = pyint_as_webviresult(v); + Py_XDECREF(v); + + PyEval_ReleaseThread(c->interp); + + return res; +} + +WebviResult webvi_stop_handle(WebviCtx ctx, WebviHandle h) { + WebviResult res = WEBVIERR_UNKNOWN_ERROR; + per_interpreter_data *c = (per_interpreter_data *)ctx; + + PyEval_AcquireThread(c->interp); + + PyObject *args = Py_BuildValue("(i)", h); + PyObject *v = call_python(c->webvi_module, "stop_handle", args); + Py_DECREF(args); + + res = pyint_as_webviresult(v); + Py_XDECREF(v); + + PyEval_ReleaseThread(c->interp); + + return res; +} + +WebviResult webvi_delete_handle(WebviCtx ctx, WebviHandle h) { + WebviResult res; + per_interpreter_data *c = (per_interpreter_data *)ctx; + + PyEval_AcquireThread(c->interp); + + PyObject *args = Py_BuildValue("(i)", h); + PyObject *v = call_python(c->webvi_module, "delete_handle", args); + Py_DECREF(args); + + res = pyint_as_webviresult(v); + Py_XDECREF(v); + + PyEval_ReleaseThread(c->interp); + + return res; +} + +WebviResult webvi_set_opt(WebviCtx ctx, WebviHandle h, WebviOption opt, ...) { + va_list argptr; + WebviResult res = WEBVIERR_UNKNOWN_ERROR; + per_interpreter_data *c = (per_interpreter_data *)ctx; + + PyEval_AcquireThread(c->interp); + + PyObject *m = PyImport_AddModule("__main__"); + if (!m) { + handle_pyerr(); + PyEval_ReleaseThread(c->interp); + return res; + } + + PyObject *maindict = PyModule_GetDict(m); + + va_start(argptr, opt); + + switch (opt) { + case WEBVIOPT_WRITEFUNC: + { + webvi_callback writerptr = va_arg(argptr, webvi_callback); + PyObject *write_prototype = PyDict_GetItemString(maindict, "WriteCallback"); + if (write_prototype) + res = set_callback(c->webvi_module, h, WEBVIOPT_WRITEFUNC, + writerptr, write_prototype); + break; + } + + case WEBVIOPT_WRITEDATA: + { + void *data = va_arg(argptr, void *); + PyObject *args = Py_BuildValue("(iil)", h, WEBVIOPT_WRITEDATA, (long)data); + PyObject *v = call_python(c->webvi_module, "set_opt", args); + Py_DECREF(args); + + res = pyint_as_webviresult(v); + Py_XDECREF(v); + + break; + } + + case WEBVIOPT_READFUNC: + { + webvi_callback readerptr = va_arg(argptr, webvi_callback); + PyObject *read_prototype = PyDict_GetItemString(maindict, "ReadCallback"); + if (read_prototype) + res = set_callback(c->webvi_module, h, WEBVIOPT_READFUNC, + readerptr, read_prototype); + break; + } + + case WEBVIOPT_READDATA: + { + void *data = va_arg(argptr, void *); + PyObject *args = Py_BuildValue("(iil)", h, WEBVIOPT_READDATA, (long)data); + PyObject *v = call_python(c->webvi_module, "set_opt", args); + Py_DECREF(args); + + res = pyint_as_webviresult(v); + Py_XDECREF(v); + + break; + } + + default: + res = WEBVIERR_INVALID_PARAMETER; + break; + } + + va_end(argptr); + + PyEval_ReleaseThread(c->interp); + + return res; +} + +WebviResult webvi_get_info(WebviCtx ctx, WebviHandle h, WebviInfo info, ...) { + va_list argptr; + WebviResult res = WEBVIERR_UNKNOWN_ERROR; + per_interpreter_data *c = (per_interpreter_data *)ctx; + + PyEval_AcquireThread(c->interp); + + va_start(argptr, info); + + switch (info) { + case WEBVIINFO_URL: + { + char **dest = va_arg(argptr, char **); + PyObject *args = Py_BuildValue("(ii)", h, info); + PyObject *v = call_python(c->webvi_module, "get_info", args); + Py_DECREF(args); + + *dest = NULL; + + if (v) { + if (PySequence_Check(v) && (PySequence_Length(v) >= 2)) { + PyObject *retval = PySequence_GetItem(v, 0); + PyObject *val = PySequence_GetItem(v, 1); + + if (PyInt_Check(retval) && + (PyString_Check(val) || PyUnicode_Check(val))) { + *dest = PyString_strdupUTF8(val); + res = PyInt_AsLong(retval); + } + + Py_DECREF(val); + Py_DECREF(retval); + } + + Py_DECREF(v); + } else { + handle_pyerr(); + } + + break; + } + + case WEBVIINFO_CONTENT_LENGTH: + { + long *dest = va_arg(argptr, long *); + PyObject *args = Py_BuildValue("(ii)", h, info); + PyObject *v = call_python(c->webvi_module, "get_info", args); + Py_DECREF(args); + + *dest = -1; + + if (v) { + if (PySequence_Check(v) && (PySequence_Length(v) >= 2)) { + PyObject *retval = PySequence_GetItem(v, 0); + PyObject *val = PySequence_GetItem(v, 1); + + if (PyInt_Check(retval) && PyInt_Check(val)) { + *dest = PyInt_AsLong(val); + res = PyInt_AsLong(retval); + } + + Py_DECREF(val); + Py_DECREF(retval); + } + + Py_DECREF(v); + } else { + handle_pyerr(); + } + + break; + } + + case WEBVIINFO_CONTENT_TYPE: + { + char **dest = va_arg(argptr, char **); + PyObject *args = Py_BuildValue("(ii)", h, info); + PyObject *v = call_python(c->webvi_module, "get_info", args); + Py_DECREF(args); + + *dest = NULL; + + if (v) { + if (PySequence_Check(v) && (PySequence_Length(v) >= 2)) { + PyObject *retval = PySequence_GetItem(v, 0); + PyObject *val = PySequence_GetItem(v, 1); + + if (PyInt_Check(retval) && + (PyString_Check(val) || PyUnicode_Check(val))) { + *dest = PyString_strdupUTF8(val); + res = PyInt_AsLong(retval); + } + + Py_DECREF(val); + Py_DECREF(retval); + } + + Py_DECREF(v); + } else { + handle_pyerr(); + } + + break; + } + + case WEBVIINFO_STREAM_TITLE: + { + char **dest = va_arg(argptr, char **); + PyObject *args = Py_BuildValue("(ii)", h, info); + PyObject *v = call_python(c->webvi_module, "get_info", args); + Py_DECREF(args); + + *dest = NULL; + + if (v) { + if (PySequence_Check(v) && (PySequence_Length(v) >= 2)) { + PyObject *retval = PySequence_GetItem(v, 0); + PyObject *val = PySequence_GetItem(v, 1); + + if (PyInt_Check(retval) && + (PyString_Check(val) || PyUnicode_Check(val))) { + *dest = PyString_strdupUTF8(val); + res = PyInt_AsLong(retval); + } + + Py_DECREF(val); + Py_DECREF(retval); + } + + Py_DECREF(v); + } else { + handle_pyerr(); + } + + break; + } + + default: + res = WEBVIERR_INVALID_PARAMETER; + break; + } + + va_end(argptr); + + PyEval_ReleaseThread(c->interp); + + return res; +} + +WebviResult webvi_fdset(WebviCtx ctx, + fd_set *read_fd_set, + fd_set *write_fd_set, + fd_set *exc_fd_set, + int *max_fd) +{ + WebviResult res = WEBVIERR_UNKNOWN_ERROR; + per_interpreter_data *c = (per_interpreter_data *)ctx; + + PyEval_AcquireThread(c->interp); + + PyObject *v = call_python(c->webvi_module, "fdset", NULL); + + if (v && PySequence_Check(v) && (PySequence_Length(v) == 5)) { + PyObject *retval = PySequence_GetItem(v, 0); + PyObject *readfd = PySequence_GetItem(v, 1); + PyObject *writefd = PySequence_GetItem(v, 2); + PyObject *excfd = PySequence_GetItem(v, 3); + PyObject *maxfd = PySequence_GetItem(v, 4); + PyObject *fd; + int i; + + if (readfd && PySequence_Check(readfd)) { + for (i=0; i<PySequence_Length(readfd); i++) { + fd = PySequence_GetItem(readfd, i); + if (fd && PyInt_Check(fd)) + FD_SET(PyInt_AsLong(fd), read_fd_set); + else + handle_pyerr(); + + Py_XDECREF(fd); + } + } + + if (writefd && PySequence_Check(writefd)) { + for (i=0; i<PySequence_Length(writefd); i++) { + fd = PySequence_GetItem(writefd, i); + if (fd && PyInt_Check(fd)) + FD_SET(PyInt_AsLong(fd), write_fd_set); + else + handle_pyerr(); + + Py_XDECREF(fd); + } + } + + if (excfd && PySequence_Check(excfd)) { + for (i=0; i<PySequence_Length(excfd); i++) { + fd = PySequence_GetItem(excfd, i); + if (fd && PyInt_Check(fd)) + FD_SET(PyInt_AsLong(fd), exc_fd_set); + else + handle_pyerr(); + + Py_XDECREF(fd); + } + } + + if (maxfd && PyInt_Check(maxfd)) + *max_fd = PyInt_AsLong(maxfd); + else + handle_pyerr(); + + if (retval && PyInt_Check(retval)) + res = PyInt_AsLong(retval); + else + handle_pyerr(); + + Py_XDECREF(maxfd); + Py_XDECREF(excfd); + Py_XDECREF(writefd); + Py_XDECREF(readfd); + Py_XDECREF(retval); + } else { + handle_pyerr(); + } + + PyEval_ReleaseThread(c->interp); + + return res; +} + +WebviResult webvi_perform(WebviCtx ctx, int fd, int ev_bitmask, long *running_handles) { + WebviResult res = WEBVIERR_UNKNOWN_ERROR; + per_interpreter_data *c = (per_interpreter_data *)ctx; + + PyEval_AcquireThread(c->interp); + + PyObject *args = Py_BuildValue("(ii)", fd, ev_bitmask); + PyObject *v = call_python(c->webvi_module, "perform", args); + Py_DECREF(args); + + if (v && (PySequence_Check(v) == 1) && (PySequence_Size(v) == 2)) { + PyObject *retval = PySequence_GetItem(v, 0); + PyObject *numhandles = PySequence_GetItem(v, 1); + + if (PyInt_Check(numhandles)) + *running_handles = PyInt_AsLong(numhandles); + if (PyInt_Check(retval)) + res = PyInt_AsLong(retval); + + Py_DECREF(numhandles); + Py_DECREF(retval); + } else { + handle_pyerr(); + } + + Py_XDECREF(v); + + PyEval_ReleaseThread(c->interp); + + return res; +} + +WebviMsg *webvi_get_message(WebviCtx ctx, int *remaining_messages) { + per_interpreter_data *c = (per_interpreter_data *)ctx; + + WebviMsg *msg = NULL; + + PyEval_AcquireThread(c->interp); + + PyObject *v = call_python(c->webvi_module, "pop_message", NULL); + + if (v) { + if ((PySequence_Check(v) == 1) && (PySequence_Length(v) == 4)) { + msg = &(c->latest_message); + msg->msg = WEBVIMSG_DONE; + msg->handle = -1; + msg->status_code = -1; + msg->data[0] = '\0'; + + PyObject *handle = PySequence_GetItem(v, 0); + if (handle && PyInt_Check(handle)) + msg->handle = (WebviHandle)PyInt_AsLong(handle); + Py_XDECREF(handle); + + PyObject *status = PySequence_GetItem(v, 1); + if (status && PyInt_Check(status)) + msg->status_code = (int)PyInt_AsLong(status); + Py_XDECREF(status); + + PyObject *errmsg = PySequence_GetItem(v, 2); + if (errmsg && + (PyString_Check(errmsg) || PyUnicode_Check(errmsg))) { + char *cstr = PyString_strdupUTF8(errmsg); + if (cstr) { + strncpy(msg->data, cstr, MAX_MSG_STRING_LENGTH); + msg->data[MAX_MSG_STRING_LENGTH-1] = '\0'; + + free(cstr); + } + } + Py_XDECREF(errmsg); + + PyObject *remaining = PySequence_GetItem(v, 3); + if (remaining && PyInt_Check(remaining)) + *remaining_messages = (int)PyInt_AsLong(remaining); + else + *remaining_messages = 0; + Py_XDECREF(remaining); + } + + if (msg->handle == -1) + msg = NULL; + + Py_DECREF(v); + } else { + handle_pyerr(); + } + + PyEval_ReleaseThread(c->interp); + + return msg; +} diff --git a/src/libwebvi/libwebvi.h b/src/libwebvi/libwebvi.h new file mode 100644 index 0000000..dd7ff39 --- /dev/null +++ b/src/libwebvi/libwebvi.h @@ -0,0 +1,330 @@ +/* + * libwebvi.h: C bindings for webvi Python module + * + * Copyright (c) 2010 Antti Ajanki <antti.ajanki@iki.fi> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __LIBWEBVI_H +#define __LIBWEBVI_H + +#include <sys/select.h> +#include <stdlib.h> + +typedef int WebviHandle; + +typedef ssize_t (*webvi_callback)(const char *, size_t, void *); + +typedef enum { + WEBVIMSG_DONE +} WebviMsgType; + +typedef struct { + WebviMsgType msg; + WebviHandle handle; + int status_code; + char *data; +} WebviMsg; + +typedef enum { + WEBVIREQ_MENU, + WEBVIREQ_FILE, + WEBVIREQ_STREAMURL +} WebviRequestType; + +typedef enum { + WEBVIERR_UNKNOWN_ERROR = -1, + WEBVIERR_OK = 0, + WEBVIERR_INVALID_HANDLE, + WEBVIERR_INVALID_PARAMETER +} WebviResult; + +typedef enum { + WEBVIOPT_WRITEFUNC, + WEBVIOPT_READFUNC, + WEBVIOPT_WRITEDATA, + WEBVIOPT_READDATA, +} WebviOption; + +typedef enum { + WEBVIINFO_URL, + WEBVIINFO_CONTENT_LENGTH, + WEBVIINFO_CONTENT_TYPE, + WEBVIINFO_STREAM_TITLE +} WebviInfo; + +typedef enum { + WEBVI_SELECT_TIMEOUT = 0, + WEBVI_SELECT_READ = 1, + WEBVI_SELECT_WRITE = 2, + WEBVI_SELECT_EXCEPTION = 4 +} WebviSelectBitmask; + +typedef enum { + WEBVI_CONFIG_TEMPLATE_PATH +} WebviConfig; + +typedef long WebviCtx; + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Initialize the library. Must be called before any other functions + * (the only exception is webvi_version() which can be called before + * the library is initialized). + * + * Returns 0, if initialization succeeds. + */ +int webvi_global_init(void); + +/* + * Frees all resources currently used by libwebvi and terminates all + * active connections. Do not call any libwebvi function after this. + * If the cleanup_python equals 0, the Python library is deinitialized + * by calling Py_Finalize(), otherwise the Python library is left + * loaded to be used by other modules of the program. + */ +void webvi_cleanup(int cleanup_python); + +/* + * Create a new context. A valid context is required for calling other + * functions in the library. The created contextes are independent of + * each other. The context must be destroyed by a call to + * webvi_cleanup_context when no longer needed. + * + * Return value 0 indicates an error. + */ +WebviCtx webvi_initialize_context(void); + +/* + * Free resources allocated by context ctx. The context can not be + * used anymore after a call to this function. + */ +void webvi_cleanup_context(WebviCtx ctx); + +/* + * Return the version of libwebvi as a string. The returned value + * points to a status buffer, and the caller should modify or not free() it. + */ +const char* webvi_version(void); + +/* + * Return a string describing an error code. The returned value points + * to a status buffer, and the caller should not modify or free() it. + */ +const char* webvi_strerror(WebviCtx ctx, WebviResult err); + +/* + * Set a new value for a global configuration option conf. + * + * Currently the only legal value for conf is TEMPLATE_PATH, which + * sets the base directory for the XSLT templates. + * + * The string pointed by value is copied to the library. + */ +WebviResult webvi_set_config(WebviCtx ctx, WebviConfig conf, const char *value); + +/* + * Creates a new download request. + * + * webvireference is a wvt:// URI of the resource that should be + * downloaded. type should be WEBVIREQ_MENU, if the resource should be + * transformed into a XML menu (that is if webvireferece comes from + * <ref> tag), WEBVIREQ_FILE, if the resource points to a media stream + * (from <stream> tag) whose contents should be downloaded, or + * WEBVIREQ_STREAMURL, if the resource is points to a media stream + * whose real URL should be resolved. + * + * Typically, the reference has been acquired from a previously + * downloaded menu. A special constant "wvt:///?srcurl=mainmenu" with + * type WEBVIREQ_MENU can be used to download mainmenu. + * + * The return value is a handle to the newly created request. Value -1 + * indicates an error. + * + * The request is initialized but the actual network transfer is not + * started. You can set up additional configuration options on the + * handle using webvi_set_opt() before starting the handle with + * webvi_start_handle(). + */ +WebviHandle webvi_new_request(WebviCtx ctx, const char *wvtreference, WebviRequestType type); + +/* + * Starts the transfer on handle h. The transfer one or more sockets + * whose file descriptors are returned by webvi_fdset(). The actual + * transfer is done during webvi_perform() calls. + */ +WebviResult webvi_start_handle(WebviCtx ctx, WebviHandle h); + +/* + * Requests that the transfer on handle h shoud be aborted. After the + * library has actually finished aborting the transfer, the handle h + * is returned by webvi_get_message() with non-zero status code. + */ +WebviResult webvi_stop_handle(WebviCtx ctx, WebviHandle h); + +/* + * Frees resources associated with handle h. The handle can not be + * used after this call. If the handle is still in the middle of a + * transfer, the transfer is forcefully aborted. + */ +WebviResult webvi_delete_handle(WebviCtx ctx, WebviHandle h); + +/* + * Sets configuration options that changes behaviour of the handle. + * opt is one of the values of WebviOption enum as indicated below. + * The fourth parameter sets the value of the specified option. Its + * type depends on opt as discussed below. + * + * Possible values for opt: + * + * WEBVIOPT_WRITEFUNC + * + * Set the callback function that shall be called when data is read + * from the network. The fourth parameter is a pointer to the callback + * funtion + * + * ssize_t (*webvi_callback)(const char *, size_t, void *). + * + * When the function is called, the first parameter is a pointer to + * the incoming data, the second parameters is the size of the + * incoming data block in bytes, and the third parameter is a pointer + * to user's data structure can be set by WEBVIOPT_WRITEDATA option. + * + * The callback funtion should return the number of bytes is + * processed. If this differs from the size of the incoming data + * block, it indicates that an error occurred and the transfer will be + * aborted. + * + * If write callback has not been set (or if it is set to NULL) the + * incoming data is printed to stdout. + * + * WEBVIOPT_WRITEDATA + * + * Sets the value that will be passed to the write callback. The + * fourth parameter is of type void *. + * + * WEBVIOPT_READFUNC + * + * Set the callback function that shall be called when data is to be + * send to network. The fourth parameter is a pointer to the callback + * funtion + * + * ssize_t (*webvi_callback)(const char *, size_t, void *) + * + * The first parameter is a pointer to a buffer where the data that is + * to be sent should be written. The second parameter is the maximum + * size of the buffer. The thirs parameter is a pointer to user data + * set with WEBVIOPT_READDATA. + * + * The return value should be the number of bytes actually written to + * the buffer. If the return value is -1, the transfer is aborted. + * + * WEBVIOPT_READDATA + * + * Sets the value that will be passed to the read callback. The + * fourth parameter is of type void *. + * + */ +WebviResult webvi_set_opt(WebviCtx ctx, WebviHandle h, WebviOption opt, ...); + +/* + * Get information specific to a WebviHandle. The value will be + * written to the memory location pointed by the third argument. The + * type of the pointer depends in the second parameter as discused + * below. + * + * Available information: + * + * WEBVIINFO_URL + * + * Receive URL. The third parameter must be a pointer to char *. The + * caller must free() the memory. + * + * WEBVIINFO_CONTENT_LENGTH + * + * Receive the value of Content-length field, or -1 if the size is + * unknown. The third parameter must be a pointer to long. + * + * WEBVIINFO_CONTENT_TYPE + * + * Receive the Content-type string. The returned value is NULL, if the + * Content-type is unknown. The third parameter must be a pointer to + * char *. The caller must free() the memory. + * + * WEBVIINFO_STREAM_TITLE + * + * Receive stream title. The returned value is NULL, if title is + * unknown. The third parameter must be a pointer to char *. The + * caller must free() the memory. + * + */ +WebviResult webvi_get_info(WebviCtx ctx, WebviHandle h, WebviInfo info, ...); + +/* + * Get active file descriptors in use by the library. The file + * descriptors that should be waited for reading, writing or + * exceptions are returned in read_fd_set, write_fd_set and + * exc_fd_set, respectively. The fd_sets are not cleared, but the new + * file descriptors are added to them. max_fd will contain the highest + * numbered file descriptor that was returned in one of the fd_sets. + * + * One should wait for action in one of the file descriptors returned + * by this function using select(), poll() or similar system call, + * and, after seeing action on a file descriptor, call webvi_perform + * on that descriptor. + */ +WebviResult webvi_fdset(WebviCtx ctx, fd_set *readfd, fd_set *writefd, fd_set *excfd, int *max_fd); + +/* + * Perform input or output action on a file descriptor. + * + * activefd is a file descriptor that was returned by an earlier call + * to webvi_fdset and has been signalled to be ready by select() or + * similar funtion. ev_bitmask should be OR'ed combination of + * WEBVI_SELECT_READ, WEBVI_SELECT_WRITE, WEBVI_SELECT_EXCEPTION to + * indicate that activefd has been signalled to be ready for reading, + * writing or being in exception state, respectively. ev_bitmask can + * also set to WEBVI_SELECT_TIMEOUT which means that the state is + * checked internally. On return, running_handles will contain the + * number of still active file descriptors. + * + * This function should be called with activefd set to 0 and + * ev_bitmask to WEBVI_SELECT_TIMEOUT periodically (every few seconds) + * even if no file descriptors have become ready to allow for timeout + * handling and other internal tasks. + */ +WebviResult webvi_perform(WebviCtx ctx, int sockfd, int ev_bitmask, long *running_handles); + +/* + * Return the next message from the message queue. Currently the only + * message, WEBVIMSG_DONE, indicates that a transfer on a handle has + * finished. The number of messages remaining in the queue after this + * call is written to remaining_messages. The pointers in the returned + * WebviMsg point to handle's internal buffers and is valid until the + * next call to webvi_get_message(). The caller should free the + * returned WebviMsg. The return value is NULL if there is no messages + * in the queue. + */ +WebviMsg *webvi_get_message(WebviCtx ctx, int *remaining_messages); + +#ifdef __cplusplus +} +#endif + + +#endif diff --git a/src/libwebvi/pythonlibname.py b/src/libwebvi/pythonlibname.py new file mode 100755 index 0000000..48f4b97 --- /dev/null +++ b/src/libwebvi/pythonlibname.py @@ -0,0 +1,14 @@ +#!/usr/bin/python + +import distutils.sysconfig +import os +import os.path + +libdir = distutils.sysconfig.get_config_var('LIBDIR') +ldlibrary = distutils.sysconfig.get_config_var('LDLIBRARY') + +libfile = os.readlink(os.path.join(libdir, ldlibrary)) +if not os.path.isabs(libfile): + libfile = os.path.join(libdir, libfile) + +print libfile diff --git a/src/libwebvi/webvi/__init__.py b/src/libwebvi/webvi/__init__.py new file mode 100644 index 0000000..b6d50d5 --- /dev/null +++ b/src/libwebvi/webvi/__init__.py @@ -0,0 +1 @@ +__all__ = ['api', 'asyncurl', 'constants', 'download', 'request', 'utils'] diff --git a/src/libwebvi/webvi/api.py b/src/libwebvi/webvi/api.py new file mode 100644 index 0000000..2fb24ab --- /dev/null +++ b/src/libwebvi/webvi/api.py @@ -0,0 +1,289 @@ +# api.py - webvi API +# +# Copyright (c) 2009, 2010 Antti Ajanki <antti.ajanki@iki.fi> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +"""webvi API + +Example workflow: + +1) Create a new request. ref is a wvt:// URI. + +handle = new_request(ref, WebviRequestType.MENU) + +2) Setup a callback function: + +setopt(handle, WebviOpt.WRITEFUNC, my_callback) + +3) Start the network transfer: + +start_handle(handle) + +4) Get active file descriptors, wait for activity on them, and let +webvi process the file descriptor. + +import select + +... + +readfd, writefd, excfd = fdset()[1:4] +readfd, writefd, excfd = select.select(readfd, writefd, excfd, 5.0) +for fd in readfd: + perform(fd, WebviSelectBitmask.READ) +for fd in writefd: + perform(fd, WebviSelectBitmask.WRITE) + +5) Iterate 4) until pop_message returns handle, which indicates that +the request has been completed. + +finished, status, errmsg, remaining = pop_message() +if finished == handle: + print 'done' +""" + +import request +import asyncore +import asyncurl +from constants import WebviErr, WebviOpt, WebviInfo, WebviSelectBitmask, WebviConfig + +# Human readable messages for WebviErr items +error_messages = { + WebviErr.OK: 'Succeeded', + WebviErr.INVALID_HANDLE: 'Invalid handle', + WebviErr.INVALID_PARAMETER: "Invalid parameter", + WebviErr.INTERNAL_ERROR: "Internal error" + } + +# Module-level variables +finished_queue = [] +request_list = request.RequestList() +socket_map = asyncore.socket_map + +# Internal functions + +class MyRequest(request.Request): + def request_done(self, err, errmsg): + """Calls the inherited function and puts the handle of the + finished request to the finished_queue.""" + finished_queue.append(self) + request.Request.request_done(self, err, errmsg) + +# Public functions + +def strerror(err): + """Return human readable error message for conststants.WebviErr""" + try: + return error_messages[err] + except KeyError: + return error_messages[WebviErr.INTERNAL_ERROR] + +def set_config(conf, value): + """Set a new value for a global configuration option conf. + + Currently the only legal value for conf is + constants.WebviConfig.TEMPLATE_PATH, which sets the base directory + for the XSLT templates. + """ + if conf == WebviConfig.TEMPLATE_PATH: + request.set_template_path(value) + return WebviErr.OK + else: + return WebviErr.INVALID_PARAMETER + +def new_request(reference, reqtype): + """Create a new request. + + reference is a wvt:// URI which typically comes from previously + opened menu. reqtype is one of conststants.WebviRequestType and + indicates wheter the reference is a navigation menu, stream that + should be downloaded, or a stream whose URL should be returned. + + Returns a handle (an integer) will be given to following + functions. Return value -1 indicates an error. + """ + req = MyRequest(reference, reqtype) + + if req.srcurl is None: + return -1 + + return request_list.put(req) + +def set_opt(handle, option, value): + """Set configuration options on a handle. + + option specifies option's name (one of constants.WebviOpt values) + and value is the new value for the option. + """ + + try: + req = request_list[handle] + except KeyError: + return WebviErr.INVALID_HANDLE + + if option == WebviOpt.WRITEFUNC: + req.writefunc = value + elif option == WebviOpt.WRITEDATA: + req.writedata = value + elif option == WebviOpt.READFUNC: + req.readfunc = value + elif option == WebviOpt.READDATA: + req.readdata = value + else: + return WebviErr.INVALID_PARAMETER + + return WebviErr.OK + +def get_info(handle, info): + """Get information about a handle. + + info is the type of data that is to be returned (one of + constants.WebviInfo values). + """ + try: + req = request_list[handle] + except KeyError: + return (WebviErr.INVALID_HANDLE, None) + + val = None + if info == WebviInfo.URL: + if req.dl is not None: + val = req.dl.get_url() + else: + val = req.srcurl + elif info == WebviInfo.CONTENT_LENGTH: + val = req.contentlength + elif info == WebviInfo.CONTENT_TYPE: + val = req.contenttype + elif info == WebviInfo.STREAM_TITLE: + val = req.streamtitle + else: + return (WebviErr.INVALID_PARAMETER, None) + + return (WebviErr.OK, val) + +def start_handle(handle): + """Start the network transfer on a handle.""" + try: + req = request_list[handle] + except KeyError: + return WebviErr.INVALID_HANDLE + + req.start() + return WebviErr.OK + +def stop_handle(handle): + """Aborts network transfer on a handle. + + The abort is confirmed by pop_message() returning the handle with + an non-zero error code. + """ + try: + req = request_list[handle] + except KeyError: + return WebviErr.INVALID_HANDLE + + if not req.is_finished(): + req.stop() + + return WebviErr.OK + +def delete_handle(handle): + """Frees resources related to handle. + + This should be called when the transfer has been completed and the + user is done with the handle. If the transfer is still in progress + when delete_handle() is called, the transfer is aborted. After + calling delete_handle() the handle value will be invalid, and + should not be feed to other functions anymore. + """ + try: + del request_list[handle] + except KeyError: + return WebviErr.INVALID_HANDLE + + return WebviErr.OK + +def pop_message(): + """Retrieve messages about finished requests. + + If a request has been finished since the last call to this + function, returns a tuple (handle, status, msg, num_messages), + where handle identifies the finished request, status is a numeric + status code (non-zero for an error), msg is a description of an + error as string, and num_messages is the number of messages that + can be retrieved by calling pop_messages() again immediately. If + the finished requests queue is empty, returns (-1, -1, "", 0). + """ + if finished_queue: + req = finished_queue.pop() + return (req.handle, req.status, req.errmsg, len(finished_queue)) + else: + return (-1, -1, "", 0) + +def fdset(): + """Get the list of file descriptors that are currently in use by + the library. + + Returrns a tuple, where the first item is a constants.WebviErr + value indicating the success of the call, the next three values + are lists of descriptors that should be monitored for reading, + writing, and exceptional conditions, respectively. The last item + is the maximum of the file descriptors in the three lists. + """ + readfd = [] + writefd = [] + excfd = [] + maxfd = -1 + + for fd, disp in socket_map.iteritems(): + if disp.readable(): + readfd.append(fd) + if fd > maxfd: + maxfd = fd + if disp.writable(): + writefd.append(fd) + if fd > maxfd: + maxfd = fd + + return (WebviErr.OK, readfd, writefd, excfd, maxfd) + +def perform(fd, ev_bitmask): + """Perform transfer on file descriptor fd. + + fd is a file descriptor that has been signalled to be ready by + select() or similar system call. ev_bitmask specifies what kind of + activity has been detected using values of + constants.WebviSelectBitmask. If ev_bitmask is + constants.WebviSelectBitmask.TIMEOUT the type of activity is check + by the function. + + This function should be called every few seconds with fd=-1, + ev_bitmask=constants.WebviSelectBitmask.TIMEOUT even if no + activity has been signalled on the file descriptors to ensure + correct handling of timeouts and other internal processing. + """ + if fd < 0: + asyncurl.poll() + else: + disp = socket_map.get(fd) + if disp is not None: + if ev_bitmask & WebviSelectBitmask.READ != 0 or \ + (ev_bitmask == 0 and disp.readable()): + disp.handle_read_event() + if ev_bitmask & WebviSelectBitmask.WRITE != 0 or \ + (ev_bitmask == 0 and disp.writable()): + disp.handle_write_event() + + return (WebviErr.OK, len(socket_map)) diff --git a/src/libwebvi/webvi/asyncurl.py b/src/libwebvi/webvi/asyncurl.py new file mode 100644 index 0000000..afc575a --- /dev/null +++ b/src/libwebvi/webvi/asyncurl.py @@ -0,0 +1,389 @@ +# asyncurl.py - Wrapper class for using pycurl objects in asyncore +# +# Copyright (c) 2009, 2010 Antti Ajanki <antti.ajanki@iki.fi> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +"""This is a wrapper for using pycurl objects in asyncore. + +Start a transfer by creating an async_curl_dispatch, and call +asyncurl.loop() instead of asyncore.loop(). +""" + +import asyncore +import pycurl +import traceback +import os +import select +import time +import cStringIO +from errno import EINTR + +SOCKET_TIMEOUT = pycurl.SOCKET_TIMEOUT +CSELECT_IN = pycurl.CSELECT_IN +CSELECT_OUT = pycurl.CSELECT_OUT +CSELECT_ERR = pycurl.CSELECT_ERR + +def poll(timeout=0.0, map=None, mdisp=None): + if map is None: + map = asyncore.socket_map + if mdisp is None: + mdisp = multi_dispatcher + if map: + timeout = min(timeout, mdisp.timeout/1000.0) + + r = []; w = []; e = [] + for fd, obj in map.items(): + is_r = obj.readable() + is_w = obj.writable() + if is_r: + r.append(fd) + if is_w: + w.append(fd) + if is_r or is_w: + e.append(fd) + if [] == r == w == e: + time.sleep(timeout) + else: + try: + r, w, e = select.select(r, w, e, timeout) + except select.error, err: + if err[0] != EINTR: + raise + else: + return + + if [] == r == w == e: + mdisp.socket_action(SOCKET_TIMEOUT, 0) + return + + for fd in r: + obj = map.get(fd) + if obj is None: + continue + asyncore.read(obj) + + for fd in w: + obj = map.get(fd) + if obj is None: + continue + asyncore.write(obj) + + for fd in e: + obj = map.get(fd) + if obj is None: + continue + asyncore._exception(obj) + +def loop(timeout=30.0, use_poll=False, map=None, count=None, mdisp=None): + if map is None: + map = asyncore.socket_map + if mdisp is None: + mdisp = multi_dispatcher + + if use_poll and hasattr(select, 'poll'): + print 'poll2 not implemented' + poll_fun = poll + + if count is None: + while map: + poll_fun(timeout, map, mdisp) + + else: + while map and count > 0: + poll_fun(timeout, map, mdisp) + count = count - 1 + +def noop_callback(s): + pass + + +class curl_multi_dispatcher: + """A dispatcher for pycurl.CurlMulti() objects. An instance of + this class is created automatically. There is usually no need to + construct one manually.""" + def __init__(self, socket_map=None): + if socket_map is None: + self._map = asyncore.socket_map + else: + self._map = socket_map + self.dispatchers = {} + self.timeout = 1000 + self._sockets_removed = False + self._curlm = pycurl.CurlMulti() + self._curlm.setopt(pycurl.M_SOCKETFUNCTION, self.socket_callback) + self._curlm.setopt(pycurl.M_TIMERFUNCTION, self.timeout_callback) + + def socket_callback(self, action, socket, user_data, socket_data): +# print 'socket callback: %d, %s' % \ +# (socket, {pycurl.POLL_NONE: "NONE", +# pycurl.POLL_IN: "IN", +# pycurl.POLL_OUT: "OUT", +# pycurl.POLL_INOUT: "INOUT", +# pycurl.POLL_REMOVE: "REMOVE"}[action]) + + if action == pycurl.POLL_NONE: + return + elif action == pycurl.POLL_REMOVE: + if socket in self._map: + del self._map[socket] + self._sockets_removed = True + return + + obj = self._map.get(socket) + if obj is None: + obj = dispatcher_wrapper(socket, self) + self._map[socket] = obj + + if action == pycurl.POLL_IN: + obj.set_readable(True) + obj.set_writable(False) + elif action == pycurl.POLL_OUT: + obj.set_readable(False) + obj.set_writable(True) + elif action == pycurl.POLL_INOUT: + obj.set_readable(True) + obj.set_writable(True) + + def timeout_callback(self, msec): + self.timeout = msec + + def attach(self, curldisp): + """Starts a transfer on curl handle by attaching it to this + multihandle.""" + self.dispatchers[curldisp.curl] = curldisp + try: + self._curlm.add_handle(curldisp.curl) + except pycurl.error: + # the curl object is already on this multi-stack + pass + + while self._curlm.socket_all()[0] == pycurl.E_CALL_MULTI_PERFORM: + pass + + self.check_completed(True) + + def detach(self, curldisp): + """Removes curl handle from this multihandle, and fire its + completion callback function.""" + self.del_curl(curldisp.curl) + + # libcurl does not send POLL_REMOVE when a handle is aborted + for socket, curlobj in self._map.items(): + if curlobj == curldisp: + + print 'handle stopped but socket in map' + + del self._map[socket] + break + + def del_curl(self, curl): + try: + self._curlm.remove_handle(curl) + except pycurl.error: + # the curl object is not on this multi-stack + pass + if curl in self.dispatchers: + del self.dispatchers[curl] + curl.close() + + def socket_action(self, fd, evbitmask): + res = -1 + OK = False + while not OK: + try: + res = self._curlm.socket_action(fd, evbitmask) + OK = True + except pycurl.error: + # Older libcurls may return CURLM_CALL_MULTI_PERFORM, + # which pycurl (at least 7.19.0) converts to an + # exception. If that happens, call socket_action + # again. + pass + return res + + def check_completed(self, force): + if not force and not self._sockets_removed: + return + self._sockets_removed = False + + nmsg, success, failed = self._curlm.info_read() + for handle in success: + disp = self.dispatchers.get(handle) + if disp is not None: + try: + disp.handle_completed(0, None) + except: + self.handle_error() + self.del_curl(handle) + for handle, err, errmsg in failed: + disp = self.dispatchers.get(handle) + if disp is not None: + try: + disp.handle_completed(err, errmsg) + except: + self.handle_error() + self.del_curl(handle) + + def handle_error(self): + print 'Exception occurred in multicurl processing' + print traceback.format_exc() + + +class dispatcher_wrapper: + """An internal helper class that connects a file descriptor in the + asyncore.socket_map to a curl_multi_dispatcher.""" + def __init__(self, fd, multicurl): + self.fd = fd + self.multicurl = multicurl + self.read_flag = False + self.write_flag = False + + def readable(self): + return self.read_flag + + def writable(self): + return self.write_flag + + def set_readable(self, x): + self.read_flag = x + + def set_writable(self, x): + self.write_flag = x + + def handle_read_event(self): + self.multicurl.socket_action(self.fd, CSELECT_IN) + self.multicurl.check_completed(False) + + def handle_write_event(self): + self.multicurl.socket_action(self.fd, CSELECT_OUT) + self.multicurl.check_completed(False) + + def handle_expt_event(self): + self.multicurl.socket_action(self.fd, CSELECT_ERR) + self.multicurl.check_completed(False) + + def handle_error(self): + print 'Exception occurred during processing of a curl request' + print traceback.format_exc() + + +class async_curl_dispatcher: + """A dispatcher class for pycurl transfers.""" + def __init__(self, url, auto_start=True): + """Initializes a pycurl object self.curl. The default is to + download url to an internal buffer whose content can be read + with self.recv(). If auto_start is False, the transfer is not + started before a call to add_channel(). + """ + self.url = url + self.socket = None + self.buffer = cStringIO.StringIO() + self.curl = pycurl.Curl() + self.curl.setopt(pycurl.URL, self.url) + self.curl.setopt(pycurl.FOLLOWLOCATION, 1) + self.curl.setopt(pycurl.AUTOREFERER, 1) + self.curl.setopt(pycurl.MAXREDIRS, 10) + self.curl.setopt(pycurl.FAILONERROR, 1) + self.curl.setopt(pycurl.WRITEFUNCTION, self.write_to_buf) + if auto_start: + self.add_channel() + + def write_to_buf(self, msg): + self.buffer.write(msg) + self.handle_read() + + def send(self, data): + raise NotImplementedError + + def recv(self, buffer_size): + # buffer_size is ignored + ret = self.buffer.getvalue() + self.buffer.reset() + self.buffer.truncate() + return ret + + def add_channel(self, multidisp=None): + if multidisp is None: + multidisp = multi_dispatcher + multidisp.attach(self) + + def del_channel(self, multidisp=None): + if multidisp is None: + multidisp = multi_dispatcher + multidisp.detach(self) + + def close(self): + self.del_channel() + + def log_info(self, message, type='info'): + if type != 'info': + print '%s: %s' % (type, message) + + def handle_error(self): + print 'Exception occurred during processing of a curl request' + print traceback.format_exc() + self.close() + + def handle_read(self): + self.log_info('unhandled read event', 'warning') + + def handle_write(self): + self.log_info('unhandled write event', 'warning') + + def handle_completed(self, err, errmsg): + """Called when the download has finished. err is a numeric + error code (or 0 if the download was successfull) and errmsg + is a curl error message as a string.""" + # It seems that a reference to self.write_to_buf forbids + # garbage collection from deleting this object. unsetopt() or + # setting the callback to None are not allowed. Is there a + # better way? + self.curl.setopt(pycurl.WRITEFUNCTION, noop_callback) + self.close() + + +def test(): + + class curl_request(async_curl_dispatcher): + def __init__(self, url, outfile, i): + async_curl_dispatcher.__init__(self, url, False) + self.id = i + self.outfile = outfile + self.add_channel() + + def handle_read(self): + buf = self.recv(4096) + print '%s: writing %d bytes' % (self.id, len(buf)) + self.outfile.write(buf) + + def handle_completed(self, err, errmsg): + if err != 0: + print '%s: error: %d %s' % (self.id, err, errmsg) + else: + print '%s: completed' % self.id + + curl_request('http://www.python.org', open('python.out', 'w'), 1) + curl_request('http://en.wikipedia.org/wiki/Main_Page', open('wikipedia.out', 'w'), 2) + loop(timeout=5.0) + + +pycurl.global_init(pycurl.GLOBAL_DEFAULT) +try: + multi_dispatcher +except NameError: + multi_dispatcher = curl_multi_dispatcher() + +if __name__ == '__main__': + test() diff --git a/src/libwebvi/webvi/constants.py b/src/libwebvi/webvi/constants.py new file mode 100644 index 0000000..2797178 --- /dev/null +++ b/src/libwebvi/webvi/constants.py @@ -0,0 +1,50 @@ +# constants.py - Python definitions for constants in libwebvi.h +# +# Copyright (c) 2009, 2010 Antti Ajanki <antti.ajanki@iki.fi> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# Keep these in sync with libwebvi.h + +class WebviRequestType: + MENU = 0 + FILE = 1 + STREAMURL = 2 + +class WebviErr: + OK = 0 + INVALID_HANDLE = 1 + INVALID_PARAMETER = 2 + INTERNAL_ERROR = -1 + +class WebviOpt: + WRITEFUNC = 0 + READFUNC = 1 + WRITEDATA = 2 + READDATA = 3 + +class WebviInfo: + URL = 0 + CONTENT_LENGTH = 1 + CONTENT_TYPE = 2 + STREAM_TITLE = 3 + +class WebviSelectBitmask: + TIMEOUT = 0 + READ = 1 + WRITE = 2 + EXCEPTION = 4 + +class WebviConfig: + TEMPLATE_PATH = 0 diff --git a/src/libwebvi/webvi/download.py b/src/libwebvi/webvi/download.py new file mode 100644 index 0000000..34240ff --- /dev/null +++ b/src/libwebvi/webvi/download.py @@ -0,0 +1,470 @@ +# download.py - webvi downloader backend +# +# Copyright (c) 2009, 2010 Antti Ajanki <antti.ajanki@iki.fi> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +import os +import asyncore +import asynchat +import cStringIO +import urllib +import subprocess +import socket +import signal +import pycurl +import asyncurl +import utils +import version + +WEBVID_USER_AGENT = 'libwebvi/%s %s' % (version.VERSION, pycurl.version) +MOZILLA_USER_AGENT = 'Mozilla/5.0 (X11; U; Linux i686 (x86_64); en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5' + +try: + from libmimms import libmms +except ImportError, e: + pass + +# Mapping from curl error codes to webvi errors. The error constants +# are defined only in pycurl 7.16.1 and newer. +if pycurl.version_info()[2] >= 0x071001: + CURL_ERROR_CODE_MAPPING = \ + {pycurl.E_OK: 0, + pycurl.E_OPERATION_TIMEOUTED: 408, + pycurl.E_OUT_OF_MEMORY: 500, + pycurl.E_PARTIAL_FILE: 504, + pycurl.E_READ_ERROR: 504, + pycurl.E_RECV_ERROR: 504, + pycurl.E_REMOTE_FILE_NOT_FOUND: 404, + pycurl.E_TOO_MANY_REDIRECTS: 404, + pycurl.E_UNSUPPORTED_PROTOCOL: 500, + pycurl.E_URL_MALFORMAT: 400, + pycurl.E_COULDNT_CONNECT: 506, + pycurl.E_COULDNT_RESOLVE_HOST: 506, + pycurl.E_COULDNT_RESOLVE_PROXY: 506, + pycurl.E_FILE_COULDNT_READ_FILE: 404, + pycurl.E_GOT_NOTHING: 504, + pycurl.E_HTTP_RETURNED_ERROR: 404, + pycurl.E_INTERFACE_FAILED: 506, + pycurl.E_LOGIN_DENIED: 403} +else: + CURL_ERROR_CODE_MAPPING = {pycurl.E_OK: 0} + +class DownloaderException(Exception): + def __init__(self, errcode, errmsg): + self.code = errcode + self.msg = errmsg + + def __str__(self): + return '%s %s' % (self.code, self.msg) + +def create_downloader(url, templatedir, writefunc=None, headerfunc=None, + donefunc=None, HTTPheaders=None, headers_only=False): + """Downloader factory. + + Returns a suitable downloader object according to url type. Raises + DownloaderException if creating the downloader fails. + """ + if url == '': + return DummyDownloader('', writefunc, headerfunc, donefunc, + headers_only) + + elif url.startswith('mms://') or url.startswith('mmsh://'): + try: + libmms + except (NameError, OSError): + raise DownloaderException(501, 'MMS scheme not supported. Install mimms.') + return MMSDownload(url, writefunc, headerfunc, donefunc, + headers_only) + + elif url.startswith('wvt://'): + executable, parameters = parse_external_downloader_wvt_uri(url, templatedir) + if executable is None: + raise DownloaderException(400, 'Invalid wvt:// URL') + try: + return ExternalDownloader(executable, parameters, writefunc, + headerfunc, donefunc, headers_only) + except OSError, (errno, strerror): + raise DownloaderException(500, 'Failed to execute %s: %s' % + (executable, strerror)) + + else: + return CurlDownload(url, writefunc, headerfunc, donefunc, + HTTPheaders, headers_only) + +def convert_curl_error(err, errmsg, aborted): + """Convert a curl error code err to webvi error code.""" + if err == pycurl.E_WRITE_ERROR: + return (402, 'Aborted') + elif err not in CURL_ERROR_CODE_MAPPING: + return (500, errmsg) + else: + return (CURL_ERROR_CODE_MAPPING[err], errmsg) + +def parse_external_downloader_wvt_uri(url, templatedir): + exe = None + params = [] + if not url.startswith('wvt:///bin/'): + return (exe, params) + + split = url[len('wvt:///bin/'):].split('?', 1) + exe = templatedir + '/bin/' + split[0] + + if len(split) > 1: + params = [urllib.unquote(x) for x in split[1].split('&')] + + return (exe, params) + +def _new_process_group(): + os.setpgid(0, 0) + +class DownloaderBase: + """Base class for downloaders.""" + def __init__(self, url): + self.url = url + + def start(self): + """Should start the download process.""" + pass + + def abort(self): + """Signals that the download should be aborted.""" + pass + + def get_url(self): + """Return the URL where the data was downloaded.""" + return self.url + + def get_body(self): + return '' + + def get_encoding(self): + """Return the encoding of the downloaded object, or None if + encoding is not known.""" + return None + + +class DummyDownloader(DownloaderBase, asyncore.file_dispatcher): + """This class doesn't actually download anything, but returns msg + string as if it had been result of a download operation. + """ + def __init__(self, msg, writefunc=None, headerfunc=None, + donefunc=None, headers_only=False): + DownloaderBase.__init__(self, '') + self.donefunc = donefunc + self.writefunc = writefunc + self.headers_only = headers_only + + readfd, writefd = os.pipe() + asyncore.file_dispatcher.__init__(self, readfd) + os.write(writefd, msg) + os.close(writefd) + + def set_file(self, fd): + # Like asyncore.file_dispatcher.set_file() but doesn't call + # add_channel(). We'll call add_channel() in start() when the + # download shall begin. + self.socket = asyncore.file_wrapper(fd) + self._fileno = self.socket.fileno() + + def start(self): + if self.headers_only: + self.donefunc(0, None) + else: + self.add_channel() + + def readable(self): + return True + + def writable(self): + return False + + def handle_read(self): + try: + data = self.recv(4096) + if data and self.writefunc is not None: + self.writefunc(data) + except socket.error: + self.handle_error() + + def handle_close(self): + self.close() + + if self.donefunc is not None: + self.donefunc(0, '') + + +class CurlDownload(DownloaderBase, asyncurl.async_curl_dispatcher): + """Downloads a large number of different URL schemes using + libcurl.""" + def __init__(self, url, writefunc=None, headerfunc=None, + donefunc=None, HTTPheaders=None, headers_only=False): + DownloaderBase.__init__(self, url) + asyncurl.async_curl_dispatcher.__init__(self, url, False) + self.donefunc = donefunc + self.writefunc = writefunc + self.contenttype = None + self.running = True + self.aborted = False + + self.curl.setopt(pycurl.USERAGENT, WEBVID_USER_AGENT) + if headers_only: + self.curl.setopt(pycurl.NOBODY, 1) + if headerfunc is not None: + self.curl.setopt(pycurl.HEADERFUNCTION, headerfunc) + self.curl.setopt(pycurl.WRITEFUNCTION, self.writewrapper) + + headers = [] + if HTTPheaders is not None: + for headername, headerdata in HTTPheaders.iteritems(): + if headername == 'cookie': + self.curl.setopt(pycurl.COOKIE, headerdata) + else: + headers.append(headername + ': ' + headerdata) + + self.curl.setopt(pycurl.HTTPHEADER, headers) + + def start(self): + self.add_channel() + + def close(self): + self.contenttype = self.curl.getinfo(pycurl.CONTENT_TYPE) + asyncurl.async_curl_dispatcher.close(self) + self.running = False + + def abort(self): + self.aborted = True + + def writewrapper(self, data): + if self.aborted: + return 0 + + if self.writefunc is None: + return self.write_to_buf(data) + else: + return self.writefunc(data) + + def get_body(self): + return self.buffer.getvalue() + + def get_encoding(self): + if self.running: + self.contenttype = self.curl.getinfo(pycurl.CONTENT_TYPE) + + if self.contenttype is None: + return None + + values = self.contenttype.split(';', 1) + if len(values) > 1: + for par in values[1].split(' '): + if par.startswith('charset='): + return par[len('charset='):].strip('"') + + return None + + def handle_read(self): + # Do nothing to the read data here. Instead, let the base + # class to collect the data to self.buffer. + pass + + def handle_completed(self, err, errmsg): + asyncurl.async_curl_dispatcher.handle_completed(self, err, errmsg) + if self.donefunc is not None: + err, errmsg = convert_curl_error(err, errmsg, self.aborted) + self.donefunc(err, errmsg) + + +class MMSDownload(DownloaderBase, asyncore.file_dispatcher): + def __init__(self, url, writefunc=None, headerfunc=None, + donefunc=None, headers_only=False): + DownloaderBase.__init__(self, url) + self.r, self.w = os.pipe() + asyncore.file_dispatcher.__init__(self, self.r) + + self.writefunc = writefunc + self.headerfunc = headerfunc + self.donefunc = donefunc + self.relaylen = -1 + self.expectedlen = -1 + self.headers_only = headers_only + self.stream = None + self.errmsg = None + self.aborted = False + + def set_file(self, fd): + self.socket = asyncore.file_wrapper(fd) + self._fileno = self.socket.fileno() + + def recv(self, buffer_size): + data = self.stream.read() + if not data: + self.handle_close() + return '' + else: + return data + + def close(self): + if self.stream is not None: + self.stream.close() + + os.close(self.w) + asyncore.file_dispatcher.close(self) + + def readable(self): + return self.stream is not None + + def writable(self): + return False + + def start(self): + try: + self.stream = libmms.Stream(self.url, 1000000) + except libmms.Error, e: + self.errmsg = e.message + self.handle_close() + return + + os.write(self.w, '0') # signal that this dispatcher has data available + + if self.headerfunc: + # Output the length in a HTTP-like header field so that we + # can use the same callbacks as with HTTP downloads. + ext = utils.get_url_extension(self.url) + if ext == 'wma': + self.headerfunc('Content-Type: audio/x-ms-wma') + else: # if ext == 'wmv': + self.headerfunc('Content-Type: video/x-ms-wmv') + self.headerfunc('Content-Length: %d' % self.stream.length()) + + if self.headers_only: + self.handle_close() + else: + self.add_channel() + + def abort(self): + self.aborted = True + + def handle_read(self): + if self.aborted: + self.handle_close() + return '' + + try: + data = self.recv(4096) + if data and (self.writefunc is not None): + self.writefunc(data) + except libmms.Error, e: + self.errmsg = e.message + self.handle_close() + return + + def handle_close(self): + self.close() + self.stream = None + + if self.errmsg is not None: + self.donefunc(500, self.errmsg) + elif self.aborted: + self.donefunc(402, 'Aborted') + elif self.relaylen < self.expectedlen: + # We got fewer bytes than expected. Maybe the connection + # was lost? + self.donefunc(504, 'Download may be incomplete (length %d < %d)' % + (self.relaylen, self.expectedlen)) + else: + self.donefunc(0, '') + + +class ExternalDownloader(DownloaderBase, asyncore.file_dispatcher): + """Executes an external process and reads its result on standard + output.""" + def __init__(self, executable, parameters, writefunc=None, + headerfunc=None, donefunc=None, headers_only=False): + DownloaderBase.__init__(self, '') + asyncore.dispatcher.__init__(self, None, None) + self.executable = executable + self.writefunc = writefunc + self.headerfunc = headerfunc + self.donefunc = donefunc + self.headers_only = headers_only + self.contenttype = '' + self.aborted = False + + args = [] + for par in parameters: + try: + key, val = par.split('=', 1) + if key == 'contenttype': + self.contenttype = val + elif key == 'arg': + args.append(val) + except ValueError: + pass + + if args: + self.url = args[0] + else: + self.url = executable + self.cmd = [executable] + args + + self.process = None + + def start(self): + self.headerfunc('Content-Type: ' + self.contenttype) + + if self.headers_only: + self.donefunc(0, None) + return + + self.process = subprocess.Popen(self.cmd, stdout=subprocess.PIPE, + close_fds=True, + preexec_fn=_new_process_group) + asyncore.file_dispatcher.__init__(self, os.dup(self.process.stdout.fileno())) + + def abort(self): + if self.process is not None: + self.aborted = True + pg = os.getpgid(self.process.pid) + os.killpg(pg, signal.SIGTERM) + + def readable(self): + # Return True if the subprocess is still alive + return self.process is not None and self.process.returncode is None + + def writable(self): + return False + + def handle_read(self): + try: + data = self.recv(4096) + if data and self.writefunc is not None: + self.writefunc(data) + except socket.error: + self.handle_error() + return + + def handle_close(self): + self.close() + self.process.wait() + + if self.donefunc is not None: + if self.process.returncode == 0: + self.donefunc(0, '') + elif self.aborted and self.process.returncode == -signal.SIGTERM: + self.donefunc(402, 'Aborted') + else: + self.donefunc(500, 'Child process "%s" returned error %s' % \ + (' '.join(self.cmd), str(self.process.returncode))) + + self.process = None diff --git a/src/libwebvi/webvi/json2xml.py b/src/libwebvi/webvi/json2xml.py new file mode 100644 index 0000000..372e6c6 --- /dev/null +++ b/src/libwebvi/webvi/json2xml.py @@ -0,0 +1,69 @@ +import sys +import libxml2 + +try: + import json +except ImportError: + try: + import simplejson as json + except ImportError: + print 'Error: install simplejson' + raise + +def _serialize_to_xml(obj, xmlnode): + """Create XML representation of a Python object (list, tuple, + dist, or basic number and string types).""" + if type(obj) in (list, tuple): + listnode = libxml2.newNode('list') + for li in obj: + itemnode = libxml2.newNode('li') + _serialize_to_xml(li, itemnode) + listnode.addChild(itemnode) + xmlnode.addChild(listnode) + + elif type(obj) == dict: + dictnode = libxml2.newNode('dict') + for key, val in obj.iteritems(): + itemnode = libxml2.newNode(key.encode('utf-8')) + _serialize_to_xml(val, itemnode) + dictnode.addChild(itemnode) + xmlnode.addChild(dictnode) + + elif type(obj) in (str, unicode, int, long, float, complex, bool): + content = libxml2.newText(unicode(obj).encode('utf-8')) + xmlnode.addChild(content) + + elif type(obj) == type(None): + pass + + else: + raise TypeError('Unsupported type %s while serializing to xml' + % type(obj)) + +def json2xml(jsonstr, encoding=None): + """Convert JSON string jsonstr to XML tree.""" + try: + parsed = json.loads(jsonstr, encoding) + except ValueError: + return None + + xmldoc = libxml2.newDoc("1.0") + root = libxml2.newNode("jsondocument") + xmldoc.setRootElement(root) + + _serialize_to_xml(parsed, root) + + return xmldoc + +def test(): + xml = json2xml(open(sys.argv[1]).read()) + + if xml is None: + return + + print xml.serialize('utf-8') + + xml.freeDoc() + +if __name__ == '__main__': + test() diff --git a/src/libwebvi/webvi/request.py b/src/libwebvi/webvi/request.py new file mode 100644 index 0000000..e19eb9c --- /dev/null +++ b/src/libwebvi/webvi/request.py @@ -0,0 +1,617 @@ +# request.py - webvi request class +# +# Copyright (c) 2009, 2010 Antti Ajanki <antti.ajanki@iki.fi> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +import urllib +import libxml2 +import os.path +import cStringIO +import re +import download +import sys +import utils +import json2xml +from constants import WebviRequestType + +DEBUG = False + +DEFAULT_TEMPLATE_PATH = '/usr/local/share/webvi/templates' +template_path = DEFAULT_TEMPLATE_PATH + +def debug(msg): + if DEBUG: + if type(msg) == unicode: + sys.stderr.write(msg.encode('ascii', 'replace')) + else: + sys.stderr.write(msg) + sys.stderr.write('\n') + +def set_template_path(path): + global template_path + + if path is None: + template_path = os.path.realpath(DEFAULT_TEMPLATE_PATH) + else: + template_path = os.path.realpath(path) + + debug("set_template_path " + template_path) + +def parse_reference(reference): + """Parses URLs of the following form: + + wvt:///youtube/video.xsl?srcurl=http%3A%2F%2Fwww.youtube.com%2F¶m=name1,value1¶m=name2,value2 + + reference is assumed to be URL-encoded UTF-8 string. + + Returns (template, srcurl, params, processing_instructions) where + template if the URL path name (the part before ?), srcurl is the + parameter called srcurl, and params is a dictionary of (name, + quoted-value) pairs extracted from param parameters. Parameter + values are quoted so that the xslt parser handles them as string. + processing_instructions is dictionary of options that affect the + further processing of the data. + """ + try: + reference = str(reference) + except UnicodeEncodeError: + return (None, None, None, None) + + if not reference.startswith('wvt:///'): + return (None, None, None, None) + + ref = reference[len('wvt:///'):] + + template = None + srcurl = '' + parameters = {} + substitutions = {} + refsettings = {'HTTP-headers': {}} + + fields = ref.split('?', 1) + template = fields[0] + if len(fields) == 1: + return (template, srcurl, parameters, refsettings) + + for par in fields[1].split('&'): + paramfields = par.split('=', 1) + key = paramfields[0] + + if len(paramfields) == 2: + value = urllib.unquote(paramfields[1]) + else: + value = '' + + if key.lower() == 'srcurl': + srcurl = value + + elif key.lower() == 'param': + fields2 = value.split(',', 1) + pname = fields2[0].lower() + if len(fields2) == 2: + pvalue = "'" + fields2[1] + "'" + else: + pvalue = "''" + parameters[pname] = pvalue + + elif key.lower() == 'subst': + substfields = value.split(',', 1) + if len(substfields) == 2: + substitutions[substfields[0]] = substfields[1] + + elif key.lower() == 'minquality': + try: + refsettings['minquality'] = int(value) + except ValueError: + pass + + elif key.lower() == 'maxquality': + try: + refsettings['maxquality'] = int(value) + except ValueError: + pass + + elif key.lower() == 'postprocess': + refsettings.setdefault('postprocess', []).append(value) + + elif key.lower() == 'contenttype': + refsettings['overridecontenttype'] = value + + elif key.lower() == 'http-header': + try: + headername, headerdata = value.split(',', 1) + except ValueError: + continue + refsettings['HTTP-headers'][headername] = headerdata + + if substitutions: + srcurl = brace_substitution(srcurl, substitutions) + + return (template, srcurl, parameters, refsettings) + +def brace_substitution(template, subs): + """Substitute subs[x] for '{x}' in template. Unescape {{ to { and + }} to }. Unescaping is not done in substitution keys, i.e. while + scanning for a closing brace after a single opening brace.""" + strbuf = cStringIO.StringIO() + + last_pos = 0 + for match in re.finditer(r'{{?|}}', template): + next_pos = match.start() + if next_pos < last_pos: + continue + + strbuf.write(template[last_pos:next_pos]) + if match.group(0) == '{{': + strbuf.write('{') + last_pos = next_pos+2 + + elif match.group(0) == '}}': + strbuf.write('}') + last_pos = next_pos+2 + + else: # match.group(0) == '{' + key_end = template.find('}', next_pos+1) + if key_end == -1: + strbuf.write(template[next_pos:]) + last_pos = len(template) + break + + try: + strbuf.write(urllib.quote(subs[template[next_pos+1:key_end]])) + except KeyError: + strbuf.write(template[next_pos:key_end+1]) + last_pos = key_end+1 + + strbuf.write(template[last_pos:]) + return strbuf.getvalue() + + +class Request: + DEFAULT_URL_PRIORITY = 50 + + def __init__(self, reference, reqtype): + self.handle = None + self.dl = None + + # state variables + self.xsltfile, self.srcurl, self.xsltparameters, self.processing = \ + parse_reference(reference) + self.type = reqtype + self.status = -1 + self.errmsg = None + self.mediaurls = [] + + # stream information + self.contenttype = 'text/xml' + self.contentlength = -1 + self.streamtitle = '' + + # callbacks + self.writefunc = None + self.writedata = None + self.readfunc = None + self.readdata = None + + def handle_header(self, buf): + namedata = buf.split(':', 1) + if len(namedata) == 2: + headername, headerdata = namedata + if headername.lower() == 'content-type': + # Strip parameters like charset="utf-8" + self.contenttype = headerdata.split(';', 1)[0].strip() + elif headername.lower() == 'content-length': + try: + self.contentlength = int(headerdata.strip()) + except ValueError: + self.contentlength = -1 + + def setup_downloader(self, url, writefunc, headerfunc, donefunc, + HTTPheaders=None, headers_only=False): + try: + self.dl = download.create_downloader(url, + template_path, + writefunc, + headerfunc, + donefunc, + HTTPheaders, + headers_only) + self.dl.start() + except download.DownloaderException, exc: + self.dl = None + if donefunc is not None: + donefunc(exc.code, exc.msg) + + def start(self): + debug('start %s\ntemplate = %s, type = %s\n' + 'parameters = %s, processing = %s' % + (self.srcurl, self.xsltfile, self.type, str(self.xsltparameters), + str(self.processing))) + + if self.type == WebviRequestType.MENU and self.srcurl == 'mainmenu': + self.send_mainmenu() + else: + self.setup_downloader(self.srcurl, None, + self.handle_header, + self.finished_apply_xslt, + self.processing['HTTP-headers']) + + def stop(self): + if self.dl is not None: + debug("aborting") + self.dl.abort() + + def start_download(self, url=None): + """Initialize a download. + + If url is None, pop the first URL out of self.mediaurls. If + URL is an ASX playlist, read the content URL from it and start + to download the actual content. + """ + while url is None or url == '': + try: + url = self.mediaurls.pop(0) + except IndexError: + self.request_done(406, 'No more URLs left') + + debug('Start_download ' + url) + + # reset stream status + self.contenttype = 'text/xml' + self.contentlength = -1 + + if self.is_asx_playlist(url): + self.setup_downloader(url, None, + self.handle_header, + self.finished_playlist_loaded, + self.processing['HTTP-headers']) + + else: + self.setup_downloader(url, self.writewrapper, + self.handle_header, + self.finished_download, + self.processing['HTTP-headers']) + + def check_and_send_url(self, url=None): + """Check if the target exists (currently only for HTTP URLs) + before relaying the URL to the client.""" + while url is None or url == '': + try: + url = self.mediaurls.pop(0) + except IndexError: + self.request_done(406, 'No more URLs left') + return + + debug('check_and_send_url ' + str(url)) + + if self.is_asx_playlist(url): + self.setup_downloader(url, None, self.handle_header, + self.finished_playlist_loaded, + self.processing['HTTP-headers']) + elif url.startswith('http://') or url.startswith('https://'): + self.checking_url = url + self.setup_downloader(url, None, None, + self.finished_check_url, + self.processing['HTTP-headers'], True) + else: + self.writewrapper(url) + self.request_done(0, None) + + def send_mainmenu(self): + """Build the XML main menu from the module description files + in the hard drive. + """ + if not os.path.isdir(template_path): + self.request_done(404, "Can't access service directory %s" % + template_path) + return + + debug('Reading XSLT templates from ' + template_path) + + # Find menu items in the service.xml files in the subdirectories + menuitems = {} + for f in os.listdir(template_path): + if f == 'bin': + continue + + filename = os.path.join(template_path, f, 'service.xml') + try: + doc = libxml2.parseFile(filename) + except libxml2.parserError: + debug("Failed to parse " + filename); + continue + + title = '' + url = '' + + root = doc.getRootElement() + if (root is None) or (root.name != 'service'): + debug("Root node is not 'service' in " + filename); + doc.freeDoc() + continue + node = root.children + while node is not None: + if node.name == 'title': + title = utils.get_content_unicode(node) + elif node.name == 'ref': + url = utils.get_content_unicode(node) + node = node.next + doc.freeDoc() + + if (title == '') or (url == ''): + debug("Empty <title> or <ref> in " + filename); + continue + + menuitems[title.lower()] = ('<link>\n' + '<label>%s</label>\n' + '<ref>%s</ref>\n' + '</link>\n' % + (libxml2.newText(title), + libxml2.newText(url))) + # Sort the menu items + titles = menuitems.keys() + titles.sort() + + # Build the menu + mainmenu = ('<?xml version="1.0"?>\n' + '<wvmenu>\n' + '<title>Select video source</title>\n') + for t in titles: + mainmenu += menuitems[t] + mainmenu += '</wvmenu>' + + self.dl = download.DummyDownloader(mainmenu, + writefunc=self.writewrapper, + donefunc=self.request_done) + self.dl.start() + + def writewrapper(self, inp): + """Wraps pycurl write callback (with the data as the only + parameter) into webvi write callback (with signature (data, + length, usertag)). If self.writefunc is not set, write to + stdout.""" + if self.writefunc is not None: + inplen = len(inp) + written = self.writefunc(inp, inplen, self.writedata) + if written != inplen: + self.dl.close() + self.request_done(405, 'Write callback failed') + else: + sys.stdout.write(inp) + + def is_asx_playlist(self, url): + if utils.get_url_extension(url).lower() == 'asx': + return True + else: + return False + + def get_url_from_asx(self, asx, asxurl): + """Simple ASX parser. Return the content of the first <ref> + tag.""" + try: + doc = libxml2.htmlReadDoc(asx, asxurl, None, + libxml2.HTML_PARSE_NOERROR | + libxml2.HTML_PARSE_NOWARNING | + libxml2.HTML_PARSE_NONET) + except libxml2.treeError: + debug('Can\'t parse ASX:\n' + asx) + return None + root = doc.getRootElement() + ret = self._get_ref_recursive(root).strip() + doc.freeDoc() + return ret + + def _get_ref_recursive(self, node): + if node is None: + return None + if node.name.lower() == 'ref': + href = node.prop('href') + if href is not None: + return href + child = node.children + while child: + res = self._get_ref_recursive(child) + if res is not None: + return res + child = child.next + return None + + def parse_mediaurl(self, xml, minpriority, maxpriority): + debug('parse_mediaurl\n' + xml) + + self.streamtitle = '???' + mediaurls = [] + + try: + doc = libxml2.parseDoc(xml) + except libxml2.parserError: + debug('Invalid XML') + return mediaurls + + root = doc.getRootElement() + if root is None: + debug('No root node') + return mediaurls + + urls_and_priorities = [] + node = root.children + while node: + if node.name == 'title': + self.streamtitle = utils.get_content_unicode(node) + elif node.name == 'url': + try: + priority = int(node.prop('priority')) + except (ValueError, TypeError): + priority = self.DEFAULT_URL_PRIORITY + + content = node.getContent() + if priority >= minpriority and priority <= maxpriority and content != '': + urls_and_priorities.append((priority, content)) + node = node.next + doc.freeDoc() + + urls_and_priorities.sort() + urls_and_priorities.reverse() + mediaurls = [b[1] for b in urls_and_priorities] + + return mediaurls + + def finished_download(self, err, errmsg): + if err == 0: + self.request_done(0, None) + elif err != 402 and self.mediaurls: + debug('Download failed (%s %s).\nTrying the next one.' % (err, errmsg)) + self.dl = None + self.start_download() + else: + self.request_done(err, errmsg) + + def finished_playlist_loaded(self, err, errmsg): + if err == 0: + url = self.get_url_from_asx(self.dl.get_body(), + self.dl.get_url()) + if url is None: + err = 404 + errmsg = 'No ref tag in ASX file' + else: + if not self.is_asx_playlist(url) and url.startswith('http:'): + # The protocol is really "Windows Media HTTP + # Streaming Protocol", not plain HTTP, even though + # the scheme in the ASX file says "http://". We + # can't do MS-WMSP but luckily most MS-WMSP + # servers support MMS, too. + url = 'mms:' + url[5:] + + if self.type == WebviRequestType.STREAMURL: + self.check_and_send_url(url) + else: + self.start_download(url) + + if err != 0: + if not self.mediaurls: + self.request_done(err, errmsg) + else: + if self.type == WebviRequestType.STREAMURL: + self.check_and_send_url() + else: + self.start_download() + + def finished_apply_xslt(self, err, errmsg): + if err != 0: + self.request_done(err, errmsg) + return + + url = self.srcurl + + # Add input documentURL to the parameters + params = self.xsltparameters.copy() + params['docurl'] = "'" + url + "'" + + minpriority = self.processing.get('minquality', 0) + maxpriority = self.processing.get('maxquality', 100) + + xsltpath = os.path.join(template_path, self.xsltfile) + + # Check that xsltpath is inside the template directory + if os.path.commonprefix([template_path, os.path.realpath(xsltpath)]) != template_path: + self.request_done(503, 'Insecure template path') + return + + xml = self.dl.get_body() + encoding = self.dl.get_encoding() + + if self.processing.has_key('postprocess') and \ + 'json2xml' in self.processing['postprocess']: + xmldoc = json2xml.json2xml(xml, encoding) + if xmldoc is None: + self.request_done(503, 'Invalid JSON content') + return + xml = xmldoc.serialize('utf-8') + encoding = 'utf-8' + + #debug(xml) + + resulttree = utils.apply_xslt(xml, encoding, url, + xsltpath, params) + if resulttree is None: + self.request_done(503, 'XSLT transformation failed') + return + + if self.type == WebviRequestType.MENU: + debug("result:") + debug(resulttree) + self.writewrapper(resulttree) + self.request_done(0, None) + elif self.type == WebviRequestType.STREAMURL: + self.mediaurls = self.parse_mediaurl(resulttree, minpriority, maxpriority) + if self.mediaurls: + self.check_and_send_url() + else: + self.request_done(406, 'No valid URLs found') + elif self.type == WebviRequestType.FILE: + self.mediaurls = self.parse_mediaurl(resulttree, minpriority, maxpriority) + if self.mediaurls: + self.start_download() + else: + self.request_done(406, 'No valid URLs found') + else: + self.request_done(0, None) + + def finished_extract_playlist_url(self, err, errmsg): + if err == 0: + url = self.get_url_from_asx(self.dl.get_body(), + self.dl.get_url()) + if url is not None: + if self.is_asx_playlist(url): + self.setup_downloader(url, None, None, + self.finished_extract_playlist_url, + self.processing['HTTP-headers']) + else: + if url.startswith('http:'): + url = 'mms:' + url[5:] + self.check_and_send_url(url) + else: + self.request_done(503, 'XSLT tranformation failed to produce URL') + else: + self.request_done(err, errmsg) + + + def finished_check_url(self, err, errmsg): + if err == 0: + self.writewrapper(self.checking_url) + self.request_done(0, None) + else: + self.check_and_send_url() + + def request_done(self, err, errmsg): + debug('request_done: %d %s' % (err, errmsg)) + + self.status = err + self.errmsg = errmsg + self.dl = None + + def is_finished(self): + return self.status >= 0 + + +class RequestList(dict): + nextreqnum = 1 + + def put(self, req): + reqnum = RequestList.nextreqnum + RequestList.nextreqnum += 1 + req.handle = reqnum + self[reqnum] = req + return reqnum diff --git a/src/libwebvi/webvi/utils.py b/src/libwebvi/webvi/utils.py new file mode 100644 index 0000000..cefe09a --- /dev/null +++ b/src/libwebvi/webvi/utils.py @@ -0,0 +1,134 @@ +# utils.py - misc. utility functions +# +# Copyright (c) 2009, 2010 Antti Ajanki <antti.ajanki@iki.fi> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +import urlparse +import re +import libxml2 +import libxslt +import urllib + +def get_url_extension(url): + """Extracts and returns the file extension from a URL.""" + # The extension is located right before possible query + # ("?query=foo") or fragment ("#bar"). + try: + i = url.index('?') + url = url[:i] + except ValueError: + pass + # The extension is the part after the last '.' that does not + # contain '/'. + idot = url.rfind('.') + islash = url.rfind('/') + if idot > islash: + return url[idot+1:] + else: + return '' + +def urljoin_query_fix(base, url, allow_fragments=True): + """urlparse.urljoin in Python 2.5 (2.6?) and older is broken in + case url is a pure query. See http://bugs.python.org/issue1432. + This handles correctly the case where base is a full (http) url + and url is a query, and calls urljoin() for other cases.""" + if url.startswith('?'): + bscheme, bnetloc, bpath, bparams, bquery, bfragment = \ + urlparse.urlparse(base, '', allow_fragments) + bquery = url[1:] + return urlparse.urlunparse((bscheme, bnetloc, bpath, + bparams, bquery, bfragment)) + else: + return urlparse.urljoin(base, url, allow_fragments) + +def get_content_unicode(node): + """node.getContent() returns an UTF-8 encoded sequence of bytes (a + string). Convert it to a unicode object.""" + return unicode(node.getContent(), 'UTF-8', 'replace') + +def apply_xslt(buf, encoding, url, xsltfile, params=None): + """Apply xslt transform from file xsltfile to the string buf + with parameters params. url is the location of buf. Returns + the transformed file as a string, or None if the + transformation couldn't be completed.""" + stylesheet = libxslt.parseStylesheetFile(xsltfile) + + if stylesheet is None: + #self.log_info('Can\'t open stylesheet %s' % xsltfile, 'warning') + return None + try: + # htmlReadDoc fails if the buffer is empty but succeeds + # (returning an empty tree) if the buffer is a single + # space. + if buf == '': + buf = ' ' + + # Guess whether this is an XML or HTML document. + if buf.startswith('<?xml'): + doc = libxml2.readDoc(buf, url, None, + libxml2.XML_PARSE_NOERROR | + libxml2.XML_PARSE_NOWARNING | + libxml2.XML_PARSE_NONET) + else: + #self.log_info('Using HTML parser', 'debug') + doc = libxml2.htmlReadDoc(buf, url, encoding, + libxml2.HTML_PARSE_NOERROR | + libxml2.HTML_PARSE_NOWARNING | + libxml2.HTML_PARSE_NONET) + except libxml2.treeError: + stylesheet.freeStylesheet() + #self.log_info('Can\'t parse XML document', 'warning') + return None + resultdoc = stylesheet.applyStylesheet(doc, params) + stylesheet.freeStylesheet() + doc.freeDoc() + if resultdoc is None: + #self.log_info('Can\'t apply stylesheet', 'warning') + return None + + # Postprocess the document: + # Resolve relative URLs in srcurl (TODO: this should be done in XSLT) + root = resultdoc.getRootElement() + if root is None: + resultdoc.freeDoc() + return None + + node2 = root.children + while node2 is not None: + if node2.name not in ['link', 'button']: + node2 = node2.next + continue + + node = node2.children + while node is not None: + if (node.name == 'ref') or (node.name == 'stream') or \ + (node.name == 'submission'): + refurl = node.getContent() + + match = re.search(r'\?.*srcurl=([^&]*)', refurl) + if match is not None: + oldurl = urllib.unquote(match.group(1)) + absurl = urljoin_query_fix(url, oldurl) + newurl = refurl[:match.start(1)] + \ + urllib.quote(absurl) + \ + refurl[match.end(1):] + node.setContent(resultdoc.encodeSpecialChars(newurl)) + + node = node.next + node2 = node2.next + + ret = resultdoc.serialize('UTF-8') + resultdoc.freeDoc() + return ret diff --git a/src/libwebvi/webvi/version.py b/src/libwebvi/webvi/version.py new file mode 100644 index 0000000..26cb817 --- /dev/null +++ b/src/libwebvi/webvi/version.py @@ -0,0 +1,20 @@ +# version.py - webvi version +# +# Copyright (c) 2009, 2010 Antti Ajanki <antti.ajanki@iki.fi> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +MAJOR = '0' +MINOR = '2' +VERSION = MAJOR + '.' + MINOR |