diff options
author | Antti Ajanki <antti.ajanki@iki.fi> | 2013-08-06 16:21:26 +0300 |
---|---|---|
committer | Antti Ajanki <antti.ajanki@iki.fi> | 2013-08-06 16:21:26 +0300 |
commit | 1ea55bb8190e782940ff893ac8d492acabbbc886 (patch) | |
tree | c7e986e1f26abf4590ee523979ced5aff2d1c2cb | |
parent | cdec30061f66f8fea2df8629ef892f9a32d4d23c (diff) | |
download | vdr-plugin-webvideo-1ea55bb8190e782940ff893ac8d492acabbbc886.tar.gz vdr-plugin-webvideo-1ea55bb8190e782940ff893ac8d492acabbbc886.tar.bz2 |
Show title attribute of <a> when present
-rw-r--r-- | src/libwebvi/linkextractor.c | 34 | ||||
-rw-r--r-- | tests/libwebvi_tests.c | 4 | ||||
-rw-r--r-- | tests/linkextractor_tests.c | 25 | ||||
-rw-r--r-- | tests/linkextractor_tests.h | 10 |
4 files changed, 54 insertions, 19 deletions
diff --git a/src/libwebvi/linkextractor.c b/src/libwebvi/linkextractor.c index d683df6..c11b926 100644 --- a/src/libwebvi/linkextractor.c +++ b/src/libwebvi/linkextractor.c @@ -25,7 +25,8 @@ static void get_links_recursively(TidyDoc tdoc, const LinkTemplates *link_templates, const gchar *baseurl, GPtrArray *links_found); -static void getTextContent(TidyDoc tdoc, TidyNode node, TidyBuffer* buf); +static gchar *parse_link_title(TidyDoc tdoc, TidyNode node); +static void get_text_content(TidyDoc tdoc, TidyNode node, TidyBuffer* buf); LinkExtractor *link_extractor_create(const LinkTemplates *link_templates, const gchar *baseurl) { LinkExtractor *extractor; @@ -96,17 +97,11 @@ void get_links_recursively(TidyDoc tdoc, TidyNode node, const LinkAction *action = \ link_templates_get_action(link_templates, absolute_href); if (action) { - TidyBuffer titlebuf; - tidyBufInit(&titlebuf); - getTextContent(tdoc, child, &titlebuf); - tidyBufPutByte(&titlebuf, '\0'); - gchar *title = g_strdup((const gchar*)titlebuf.bp); - g_strstrip(title); LinkActionType type = link_action_get_type(action); + gchar *title = parse_link_title(tdoc, child); Link *link = link_create(absolute_href, title, type); g_ptr_array_add(links_found, link); g_free(title); - tidyBufFree(&titlebuf); } g_free(absolute_href); } @@ -119,7 +114,26 @@ void get_links_recursively(TidyDoc tdoc, TidyNode node, } } -void getTextContent(TidyDoc tdoc, TidyNode node, TidyBuffer* buf) { +gchar *parse_link_title(TidyDoc tdoc, TidyNode node) { + gchar *title; + TidyAttr title_attr = tidyAttrGetById(node, TidyAttr_TITLE); + if (title_attr) { + ctmbstr tidy_title = tidyAttrValue(title_attr); + title = g_strdup(tidy_title); + } else { + TidyBuffer titlebuf; + tidyBufInit(&titlebuf); + get_text_content(tdoc, node, &titlebuf); + tidyBufPutByte(&titlebuf, '\0'); + title = g_strdup((const gchar*)titlebuf.bp); + tidyBufFree(&titlebuf); + } + + g_strstrip(title); + return title; +} + +void get_text_content(TidyDoc tdoc, TidyNode node, TidyBuffer* buf) { if (tidyNodeGetType(node) == TidyNode_Text) { TidyBuffer content; tidyBufInit(&content); @@ -128,7 +142,7 @@ void getTextContent(TidyDoc tdoc, TidyNode node, TidyBuffer* buf) { } else { TidyNode child; for (child = tidyGetChild(node); child; child = tidyGetNext(child)) { - getTextContent(tdoc, child, buf); + get_text_content(tdoc, child, buf); } } } diff --git a/tests/libwebvi_tests.c b/tests/libwebvi_tests.c index f754157..4d57a94 100644 --- a/tests/libwebvi_tests.c +++ b/tests/libwebvi_tests.c @@ -41,6 +41,10 @@ int main(int argc, char** argv) g_test_add("/linkextractor/html_title", LinkExtractorFixture, 0, link_extractor_fixture_setup, test_link_extractor_html_title, link_extractor_fixture_teardown); + g_test_add("/linkextractor/html_title", LinkExtractorFixture, 0, + link_extractor_fixture_setup, + test_link_extractor_title_overrides_content, + link_extractor_fixture_teardown); g_test_add_func("/menubuilder/mainmenu", test_mainmenu); g_test_add("/menubuilder/title", MenuBuilderFixture, 0, diff --git a/tests/linkextractor_tests.c b/tests/linkextractor_tests.c index 2aa9166..60deba0 100644 --- a/tests/linkextractor_tests.c +++ b/tests/linkextractor_tests.c @@ -38,6 +38,12 @@ "<a href=\"" HTML5_HREF "\"><span><b> Test</b></span> <span>link</span></a>" \ "</body></html>" +#define HTML6_HREF "http://example.com/test/link" +#define HTML6_TITLE "Test link" +#define HTML6 "<html><body>" \ + "<a href=\"" HTML6_HREF "\" title=\"" HTML6_TITLE "\">ignored</a>" \ + "</body></html>" + void link_extractor_fixture_setup(LinkExtractorFixture *fixture, gconstpointer test_data) { @@ -152,7 +158,20 @@ void test_link_extractor_html_title(LinkExtractorFixture *fixture, g_ptr_array_free(links, TRUE); } -void test_link_extractor_xml(LinkExtractorFixture *fixture, - gconstpointer test_data) { - +void test_link_extractor_title_overrides_content( + LinkExtractorFixture *fixture, G_GNUC_UNUSED gconstpointer test_data) +{ + GPtrArray *links; + link_extractor_append(fixture->extractor, HTML6, strlen(HTML6)); + links = link_extractor_get_links(fixture->extractor); + g_assert(links); + g_assert(links->len == 1); + const struct Link *link = g_ptr_array_index(links, 0); + const char *href = link_get_href(link); + g_assert(href); + g_assert(strcmp(href, HTML6_HREF) == 0); + const char *title = link_get_title(link); + g_assert(title); + g_assert(strcmp(title, HTML6_TITLE) == 0); + g_ptr_array_free(links, TRUE); } diff --git a/tests/linkextractor_tests.h b/tests/linkextractor_tests.h index 49d2c7f..62cc52f 100644 --- a/tests/linkextractor_tests.h +++ b/tests/linkextractor_tests.h @@ -25,10 +25,8 @@ void test_link_extractor_invalid_html(LinkExtractorFixture *fixture, void test_link_extractor_relative_urls(LinkExtractorFixture *fixture, gconstpointer test_data); void test_link_extractor_html_title(LinkExtractorFixture *fixture, - G_GNUC_UNUSED gconstpointer test_data); - -void test_link_extractor_xml(LinkExtractorFixture *fixture, - gconstpointer test_data); - - + gconstpointer test_data); +void test_link_extractor_title_overrides_content(LinkExtractorFixture *fixture, + gconstpointer test_data); + #endif // LINK_EXTRACTOR_TESTS_H |