From 2d4d55cfedccfa80d283592af349e93d0968f58e Mon Sep 17 00:00:00 2001 From: Antti Ajanki Date: Fri, 9 Aug 2013 16:28:16 +0300 Subject: Assume that incoming HTML is UTF-8 encoded until a proper fix is implemented --- src/libwebvi/linkextractor.c | 1 + src/libwebvi/request.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/src/libwebvi/linkextractor.c b/src/libwebvi/linkextractor.c index b0e4270..1691a46 100644 --- a/src/libwebvi/linkextractor.c +++ b/src/libwebvi/linkextractor.c @@ -63,6 +63,7 @@ GPtrArray *link_extractor_get_links(LinkExtractor *self) { tidyOptSetInt(tdoc, TidyWrapLen, 4096); tidyBufInit(&errbuf); tidySetErrorBuffer(tdoc, &errbuf); + tidySetInCharEncoding(tdoc, "utf8"); err = tidyParseBuffer(tdoc, &self->html_buffer); if (err >= 0) { diff --git a/src/libwebvi/request.c b/src/libwebvi/request.c index fc2b635..baca467 100644 --- a/src/libwebvi/request.c +++ b/src/libwebvi/request.c @@ -133,6 +133,8 @@ PipeComponent *build_and_start_menu_pipe(const WebviRequest *self) { pipe_component_set_next(p2, p3); pipe_downloader_start(p1); + // TODO: the downloaded HTML should be converted into UTF-8 + return (PipeComponent *)p1; } -- cgit v1.2.3