summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatti Lehtimäki <matti.lehtimaki@gmail.com>2012-09-08 00:13:23 +0300
committerMatti Lehtimäki <matti.lehtimaki@gmail.com>2012-09-08 00:13:23 +0300
commit823ced0e58385c959dc9de8a4621004f6e2d5ce0 (patch)
treef3d622596151470420613c486d62e4d1c21a8b85
parentb94072b545e08d3b06d614267c42eced82bf50a8 (diff)
downloadvdr-plugin-epgfixer-823ced0e58385c959dc9de8a4621004f6e2d5ce0.tar.gz
vdr-plugin-epgfixer-823ced0e58385c959dc9de8a4621004f6e2d5ce0.tar.bz2
Add support for Perl-style operators 's///' and 'm//' and modifiers 'gimsuxX'.
-rw-r--r--HISTORY7
-rw-r--r--Makefile2
-rw-r--r--README16
-rw-r--r--epgfixer/regexp.conf6
-rw-r--r--regexp.c243
-rw-r--r--regexp.h4
6 files changed, 217 insertions, 61 deletions
diff --git a/HISTORY b/HISTORY
index 327108a..4edf609 100644
--- a/HISTORY
+++ b/HISTORY
@@ -1,6 +1,13 @@
VDR Plugin 'epgfixer' Revision History
--------------------------------------
+2012-09-xx: Version 0.3.0
+
+- Enable JIT compiling in PCRE if supported.
+- Support for channel intervals for channel numbers.
+- Support search-and-replace using Perl-style 's///' operator.
+- Support Perl-style modifiers when using 's///' or 'm//' operators.
+
2012-05-13: Version 0.2.1
- Fix linking when --as-needed is used.
diff --git a/Makefile b/Makefile
index cb70ac6..5f34a9b 100644
--- a/Makefile
+++ b/Makefile
@@ -29,8 +29,6 @@ TMPDIR ?= /tmp
### Regexp
ifeq (exists, $(shell pkg-config libpcre && echo exists))
REGEXLIB = pcre
-else ifeq (exists, $(shell pkg-config tre && echo exists))
- REGEXLIB = tre
endif
### Make sure that necessary options are included:
diff --git a/README b/README
index 0f64d19..783707b 100644
--- a/README
+++ b/README
@@ -64,11 +64,27 @@ General syntax of configuration files:
Syntax of regexp.conf line is "Channel_list:Parsed_epg_field=Regexp" with:
- Parsed_epg_field is the EPG field for which the regular expression is applied
with available field names title, shorttext and description.
+- Regular expressions can be used in different ways: Perl-style
+ 's/PATTERN/REPLACEMENT/' and 'm/PATTERN/' operators or simply using 'PATTERN'
+ of which the latter two use named backreferences.
- Regular expressions use named backreferences with either title, shorttext,
description or rating (parental rating).
- By prepending "a" or "p" to backreference name (except rating field) the
back referenced string is either appended or prepended to the original
content of the target EPG field, respectively.
+- Perl-style operator 's/PATTERN/REPLACEMENT/' replaces match of PATTERN with
+ REPLACEMENT in the EPG field and cannot be used to transfer content to other
+ EPG fields.
+- Perl-style operator 'm//' operates using backreferences similarly to when not
+ using Perl-style operator but adds possibility of using modifiers.
+- One or more modifiers can be appended Perl-style operators:
+ - g = Global matching (only with 's///')
+ - i = Case-insensitive pattern matching.
+ - m = Treat string as multiple lines.
+ - s = Dot matches newlines.
+ - u = Handles UTF8 characters.
+ - x = Ignore white spaces. Comments in regular expression using (?#comment).
+ - X = Strict escape parsing.
- Several regular expressions may be applied to same field.
Syntax of charset.conf line is "Channel_list:BroadcastCharset=OriginalCharSet"
diff --git a/epgfixer/regexp.conf b/epgfixer/regexp.conf
index ab36f5d..f1b6f50 100644
--- a/epgfixer/regexp.conf
+++ b/epgfixer/regexp.conf
@@ -15,3 +15,9 @@
# Move parental rating from start of description:
# (12) Lorem ipsum ...
#description=^[(](?<rating>[0-9S]{1,2})[)][ ]+(?<description>.*)
+
+# Replaces every instance of 'foo' with 'bar' in description
+#description=s/foo/bar/g
+
+# Matches 'foo' case-insensitively
+#description=m/foo/i
diff --git a/regexp.c b/regexp.c
index db820ef..919923d 100644
--- a/regexp.c
+++ b/regexp.c
@@ -12,6 +12,10 @@
#define PCRE_STUDY_JIT_COMPILE 0
#endif
+#define OVECCOUNT 33 /* should be a multiple of 3 */
+
+typedef enum { NONE,FIRST,GLOBAL } replace;
+
/* Global instance */
cEpgfixerList<cRegexp, cEvent> EpgfixerRegexps;
@@ -22,7 +26,10 @@ const char *strBackrefs[] = { "atitle","ptitle","title","ashorttext","pshorttext
cRegexp::cRegexp()
{
+ modifiers = 0;
regexp = NULL;
+ replace = NONE;
+ replacement = NULL;
source = REGEXP_UNDEFINED;
re = NULL;
sd = NULL;
@@ -32,6 +39,7 @@ cRegexp::~cRegexp(void)
{
Free();
free(regexp);
+ free(replacement);
FreeCompiled();
}
@@ -40,7 +48,7 @@ void cRegexp::Compile()
FreeCompiled();
const char *error;
int erroffset;
- re = pcre_compile(regexp, 0, &error, &erroffset, NULL);
+ re = pcre_compile(regexp, modifiers, &error, &erroffset, NULL);
if (error) {
error("PCRE compile error: %s at offset %i", error, erroffset);
enabled = false;
@@ -68,19 +76,89 @@ void cRegexp::FreeCompiled()
}
}
+void cRegexp::ParseRegexp(char *restring)
+{
+ if (restring) {
+ int len = strlen(restring);
+ if (len > 2 && restring[1] == '/' && (restring[0] == 'm' || restring[0] == 's')) {
+ // separate modifiers from end of regexp
+ char *l = strrchr(restring, '/');
+ if (l) {
+ *l = 0;
+ int i = 1;
+ // handle all modifiers
+ while (*(l+i) != 0) {
+ switch (*(l+i)) {
+ case 'g':
+ if (restring[0] == 's')
+ replace = GLOBAL;
+ break;
+ case 'i':
+ modifiers = modifiers | PCRE_CASELESS;
+ break;
+ case 'm':
+ modifiers = modifiers | PCRE_MULTILINE;
+ break;
+ case 's':
+ modifiers = modifiers | PCRE_DOTALL;
+ break;
+ case 'u':
+ modifiers = modifiers | PCRE_UTF8;
+ break;
+ case 'x':
+ modifiers = modifiers | PCRE_EXTENDED;
+ break;
+ case 'X':
+ modifiers = modifiers | PCRE_EXTRA;
+ break;
+ default:
+ break;
+ }
+ i++;
+ }
+ }
+ // parse regexp format 's///'
+ if (restring[0] == 's') {
+ if (replace == NONE)
+ replace = FIRST;
+ char *p = &restring[2];
+ while (p = strchr(p, '/')) {
+ // check for escaped slashes
+ if (*(p-1) != '\\') {
+ *p = 0;
+ regexp = strdup(&restring[2]);
+ if (*(p+1) != '/') //
+ replacement = strdup(p+1);
+ break;
+ }
+ }
+ }
+ else if (restring[0] == 'm') // parse regexp format 'm//'
+ regexp = strdup(&restring[2]);
+ }
+ else // use backreferences
+ regexp = strdup(restring);
+ }
+}
+
void cRegexp::SetFromString(char *s, bool Enabled)
{
+ modifiers = 0;
FREE(regexp);
+ replace = NONE;
+ FREE(replacement);
Free();
FreeCompiled();
enabled = Enabled;
bool compile = true;
+ // comments are not analysed
if (s[0] == '#') {
enabled = false;
source = REGEXP_UNDEFINED;
string = strdup(s);
return;
}
+ // inactive regexps
if (s[0] == '!') {
enabled = compile = false;
string = strdup(s+1);
@@ -90,9 +168,10 @@ void cRegexp::SetFromString(char *s, bool Enabled)
char *p = strchr(s, '=');
if (p) {
*p = 0;
- regexp = strdup(p + 1);
+ ParseRegexp(p + 1);
char *chanfield = (s[0] == '!') ? s+1 : s;
char *field = chanfield;
+ // find active channels list
char *f = strchr(chanfield, ':');
if (f) {
*f = 0;
@@ -115,68 +194,114 @@ bool cRegexp::Apply(cEvent *Event)
if (enabled && re && IsActive(Event->ChannelID())) {
cString tmpstring;
switch (source) {
- case REGEXP_TITLE:
- tmpstring = Event->Title();
- break;
- case REGEXP_SHORTTEXT:
- tmpstring = Event->ShortText();
- break;
- case REGEXP_DESCRIPTION:
- tmpstring = Event->Description();
- break;
- default:
- tmpstring = "";
- break;
- }
+ case REGEXP_TITLE:
+ tmpstring = Event->Title();
+ break;
+ case REGEXP_SHORTTEXT:
+ tmpstring = Event->ShortText();
+ break;
+ case REGEXP_DESCRIPTION:
+ tmpstring = Event->Description();
+ break;
+ default:
+ tmpstring = "";
+ break;
+ }
if (!*tmpstring)
tmpstring = "";
- const char *string;
- int ovector[20];
- int rc;
- rc = pcre_exec(re, sd, *tmpstring, strlen(*tmpstring), 0, 0, ovector, 20);
- if (rc > 0) {
- int i = 0;
- while (i < 10) {
- if (pcre_get_named_substring(re, tmpstring, ovector, rc, strBackrefs[i], &string) != PCRE_ERROR_NOSUBSTRING) {
- switch (i) {
- case TITLE:
- Event->SetTitle(string);
- break;
- case ATITLE:
- Event->SetTitle(*cString::sprintf("%s %s", Event->Title(), string));
- break;
- case PTITLE:
- Event->SetTitle(*cString::sprintf("%s %s", string, Event->Title()));
+ int ovector[OVECCOUNT];
+ int rc = 0;
+ if (replace != NONE) {// find and replace
+ int last_match_end = -1;
+ int options = 0;
+ int start_offset = 0;
+ int tmpstringlen = strlen(*tmpstring);
+ cString resultstring = "";
+ // loop through matches
+ while ((rc = pcre_exec(re, sd, *tmpstring, tmpstringlen, start_offset, options, ovector, OVECCOUNT)) > 0) {
+ last_match_end = ovector[1];
+ resultstring = cString::sprintf("%s%.*s%s", *resultstring, ovector[0]-start_offset, &tmpstring[start_offset], replacement);
+ options = 0;
+ if (ovector[0] == ovector[1]) {
+ if (ovector[0] == tmpstringlen)
+ break;
+ options = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
+ }
+ if (replace == FIRST) // only first match wanted
break;
- case SHORTTEXT:
- Event->SetShortText(string);
- break;
- case ASHORTTEXT:
- Event->SetShortText(*cString::sprintf("%s %s", Event->ShortText(), string));
- break;
- case PSHORTTEXT:
- Event->SetShortText(*cString::sprintf("%s %s", string, Event->ShortText()));
- break;
- case DESCRIPTION:
- Event->SetDescription(string);
- break;
- case ADESCRIPTION:
- Event->SetDescription(*cString::sprintf("%s %s", Event->Description(), string));
- break;
- case PDESCRIPTION:
- Event->SetDescription(*cString::sprintf("%s %s", string, Event->Description()));
- break;
- case RATING:
- Event->SetParentalRating(atoi(string));
- break;
- default:
- break;
- }
- pcre_free_substring(string);
+ start_offset = ovector[1];
+ }
+ // replace EPG field if regexp matched
+ if (**resultstring && (last_match_end < tmpstringlen-1)) {
+ resultstring = cString::sprintf("%s%s", *resultstring, tmpstring+last_match_end);
+ switch (source) {
+ case REGEXP_TITLE:
+ Event->SetTitle(resultstring);
+ break;
+ case REGEXP_SHORTTEXT:
+ Event->SetShortText(resultstring);
+ break;
+ case REGEXP_DESCRIPTION:
+ Event->SetDescription(resultstring);
+ break;
+ default:
+ break;
}
- ++i;
+ return true;
+ }
+ }
+ else {// use backreferences
+ const char *string;
+ rc = pcre_exec(re, sd, *tmpstring, strlen(*tmpstring), 0, 0, ovector, OVECCOUNT);
+ if (rc == 0) {
+ error("maximum number of captured substrings is %d\n", OVECCOUNT/3 - 1);
+ }
+ else if (rc > 0) {
+ int i = 0;
+ // loop through all possible backreferences
+ // TODO allow duplicate backreference names?
+ while (i < 10) {
+ if (pcre_get_named_substring(re, tmpstring, ovector, rc, strBackrefs[i], &string) != PCRE_ERROR_NOSUBSTRING) {
+ switch (i) {
+ case TITLE:
+ Event->SetTitle(string);
+ break;
+ case ATITLE:
+ Event->SetTitle(*cString::sprintf("%s %s", Event->Title(), string));
+ break;
+ case PTITLE:
+ Event->SetTitle(*cString::sprintf("%s %s", string, Event->Title()));
+ break;
+ case SHORTTEXT:
+ Event->SetShortText(string);
+ break;
+ case ASHORTTEXT:
+ Event->SetShortText(*cString::sprintf("%s %s", Event->ShortText(), string));
+ break;
+ case PSHORTTEXT:
+ Event->SetShortText(*cString::sprintf("%s %s", string, Event->ShortText()));
+ break;
+ case DESCRIPTION:
+ Event->SetDescription(string);
+ break;
+ case ADESCRIPTION:
+ Event->SetDescription(*cString::sprintf("%s %s", Event->Description(), string));
+ break;
+ case PDESCRIPTION:
+ Event->SetDescription(*cString::sprintf("%s %s", string, Event->Description()));
+ break;
+ case RATING:
+ Event->SetParentalRating(atoi(string));
+ break;
+ default:
+ break;
+ }
+ pcre_free_substring(string);
+ }
+ ++i;
+ }
+ return true;
}
- return true;
}
}
return false;
diff --git a/regexp.h b/regexp.h
index 67270c2..970091a 100644
--- a/regexp.h
+++ b/regexp.h
@@ -21,11 +21,15 @@ class cRegexp : public cListItem
{
private:
char *regexp;
+ char *replacement;
+ int replace;
+ int modifiers;
int source;
pcre *re;
pcre_extra *sd;
void Compile();
void FreeCompiled();
+ void ParseRegexp(char *restring);
public:
cRegexp();
virtual ~cRegexp();