Quellcode durchsuchen

ncd: modules: regex_match: add regex_replace()

ambrop7 vor 14 Jahren
Ursprung
Commit
fc3608bfcf
1 geänderte Dateien mit 218 neuen und 0 gelöschten Zeilen
  1. 218 0
      ncd/modules/regex_match.c

+ 218 - 0
ncd/modules/regex_match.c

@@ -46,6 +46,18 @@
  *   The input and regex strings are interpreted according to the POSIX regex functions
  *   (regcomp(), regexec()); in particular, the current locale setting affects the
  *   interpretation.
+ * 
+ * Synopsis:
+ *   regex_replace(string input, list(string) regex, list(string) replace)
+ * 
+ * Variables:
+ *   string (empty) - transformed input
+ * 
+ * Description:
+ *   Replaces matching parts of the input string. Replacement is performed one regular
+ *   expression after another: starting with the input string, for each given regular
+ *   expression, matching substrings of the current string are replaced with the
+ *   corresponding replacement string.
  */
 
 #include <stdlib.h>
@@ -55,6 +67,8 @@
 
 #include <misc/string_begins_with.h>
 #include <misc/parse_number.h>
+#include <misc/expstring.h>
+#include <misc/debug.h>
 #include <ncd/NCDModule.h>
 
 #include <generated/blog_channel_ncd_regex_match.h>
@@ -72,6 +86,90 @@ struct instance {
     regmatch_t matches[MAX_MATCHES];
 };
 
+struct replace_instance {
+    NCDModuleInst *i;
+    char *output;
+    size_t output_len;
+    int output_free;
+};
+
+static int regex_replace (const char *input, size_t input_len, const char *regex, const char *replace, size_t replace_len, char **out_output, size_t *out_output_len, NCDModuleInst *i)
+{
+    int res = 0;
+    
+    // make sure we don't overflow regoff_t
+    if (input_len > INT_MAX) {
+        ModuleLog(i, BLOG_ERROR, "string is too long");
+        goto fail0;
+    }
+    
+    // compile regex
+    regex_t preg;
+    int ret;
+    if ((ret = regcomp(&preg, regex, REG_EXTENDED)) != 0) {
+        ModuleLog(i, BLOG_ERROR, "regcomp failed (error=%d)", ret);
+        goto fail0;
+    }
+    
+    // init output string
+    ExpString str;
+    if (!ExpString_Init(&str)) {
+        ModuleLog(i, BLOG_ERROR, "ExpString_Init failed");
+        goto fail1;
+    }
+    
+    while (1) {
+        // execute match
+        regmatch_t matches[MAX_MATCHES];
+        matches[0].rm_so = 0;
+        matches[0].rm_eo = input_len;
+        if (regexec(&preg, input, MAX_MATCHES, matches, REG_STARTEND) != 0) {
+            break;
+        }
+        
+        ASSERT(matches[0].rm_so >= 0)
+        ASSERT(matches[0].rm_so <= input_len)
+        ASSERT(matches[0].rm_eo >= matches[0].rm_so)
+        ASSERT(matches[0].rm_eo <= input_len)
+        
+        // append data before match
+        if (!ExpString_AppendBinary(&str, input, matches[0].rm_so)) {
+            ModuleLog(i, BLOG_ERROR, "ExpString_AppendBinary failed");
+            goto fail2;
+        }
+        
+        // append replace data
+        if (!ExpString_AppendBinary(&str, replace, replace_len)) {
+            ModuleLog(i, BLOG_ERROR, "ExpString_AppendBinary failed");
+            goto fail2;
+        }
+        
+        // go on matching the rest
+        input += matches[0].rm_eo;
+        input_len -= matches[0].rm_eo;
+    }
+    
+    // append remaining data
+    if (!ExpString_AppendBinary(&str, input, input_len)) {
+        ModuleLog(i, BLOG_ERROR, "ExpString_AppendBinary failed");
+        goto fail2;
+    }
+    
+    // success
+    *out_output = ExpString_Get(&str);
+    *out_output_len = ExpString_Length(&str);
+    res = 1;
+    
+fail2:
+    if (!res) {
+        ExpString_Free(&str);
+    }
+fail1:
+    regfree(&preg);
+fail0:
+    return res;
+}
+
 static void func_new (NCDModuleInst *i)
 {
     // allocate instance
@@ -182,12 +280,132 @@ static int func_getvar (void *vo, const char *name, NCDValue *out)
     return 0;
 }
 
+static void replace_func_new (NCDModuleInst *i)
+{
+    // allocate structure
+    struct replace_instance *o = malloc(sizeof(*o));
+    if (!o) {
+        ModuleLog(i, BLOG_ERROR, "malloc failed");
+        goto fail0;
+    }
+    o->i = i;
+    NCDModuleInst_Backend_SetUser(i, o);
+    
+    // read arguments
+    NCDValue *input_arg;
+    NCDValue *regex_arg;
+    NCDValue *replace_arg;
+    if (!NCDValue_ListRead(i->args, 3, &input_arg, &regex_arg, &replace_arg)) {
+        ModuleLog(i, BLOG_ERROR, "wrong arity");
+        goto fail1;
+    }
+    if (!NCDValue_IsString(input_arg) || !NCDValue_IsList(regex_arg) || !NCDValue_IsList(replace_arg)) {
+        ModuleLog(i, BLOG_ERROR, "wrong type");
+        goto fail1;
+    }
+    
+    // check number of regex/replace
+    if (NCDValue_ListCount(regex_arg) != NCDValue_ListCount(replace_arg)) {
+        ModuleLog(i, BLOG_ERROR, "number of regex's is not the same as number of replacements");
+        goto fail1;
+    }
+    
+    // start with input as current text
+    char *current = NCDValue_StringValue(input_arg);
+    size_t current_len = NCDValue_StringLength(input_arg);
+    int current_free = 0;
+    
+    NCDValue *regex = NCDValue_ListFirst(regex_arg);
+    NCDValue *replace = NCDValue_ListFirst(replace_arg);
+    
+    while (regex) {
+        // check type of regex and replace
+        if (!NCDValue_IsStringNoNulls(regex) || !NCDValue_IsString(replace)) {
+            ModuleLog(i, BLOG_ERROR, "regex/replace element has wrong type");
+            goto fail2;
+        }
+        
+        // perform the replacing
+        char *replaced;
+        size_t replaced_len;
+        if (!regex_replace(current, current_len, NCDValue_StringValue(regex), NCDValue_StringValue(replace), NCDValue_StringLength(replace), &replaced, &replaced_len, i)) {
+            goto fail2;
+        }
+        
+        // update current text
+        if (current_free) {
+            free(current);
+        }
+        current = replaced;
+        current_len = replaced_len;
+        current_free = 1;
+        
+        regex = NCDValue_ListNext(regex_arg, regex);
+        replace = NCDValue_ListNext(replace_arg, replace);
+    }
+    
+    // set output
+    o->output = current;
+    o->output_len = current_len;
+    o->output_free = current_free;
+    
+    // signal up
+    NCDModuleInst_Backend_Up(o->i);
+    return;
+    
+fail2:
+    if (current_free) {
+        free(current);
+    }
+fail1:
+    free(o);
+fail0:
+    NCDModuleInst_Backend_SetError(i);
+    NCDModuleInst_Backend_Dead(i);
+}
+
+static void replace_func_die (void *vo)
+{
+    struct replace_instance *o = vo;
+    NCDModuleInst *i = o->i;
+    
+    // free output
+    if (o->output_free) {
+        free(o->output);
+    }
+    
+    // free instance
+    free(o);
+    
+    NCDModuleInst_Backend_Dead(i);
+}
+
+static int replace_func_getvar (void *vo, const char *name, NCDValue *out)
+{
+    struct replace_instance *o = vo;
+    
+    if (!strcmp(name, "")) {
+        if (!NCDValue_InitStringBin(out, o->output, o->output_len)) {
+            ModuleLog(o->i, BLOG_ERROR, "NCDValue_InitStringBin failed");
+            return 0;
+        }
+        return 1;
+    }
+    
+    return 0;
+}
+
 static const struct NCDModule modules[] = {
     {
         .type = "regex_match",
         .func_new = func_new,
         .func_die = func_die,
         .func_getvar = func_getvar
+    }, {
+        .type = "regex_replace",
+        .func_new = replace_func_new,
+        .func_die = replace_func_die,
+        .func_getvar = replace_func_getvar
     }, {
         .type = NULL
     }