/** * @file explode.c * @author Ambroz Bizjak * * @section LICENSE * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the author nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * @section DESCRIPTION * * * Synopsis: * compile_search(string str) * * Description: * Performs calculations to enable efficient string searches for the * given string (builds the KMP table). The string must be non-empty. * * * Synopsis: * explode(string delimiter, string input [, string limit]) * compile_search::explode(string input [, string limit]) * * Description: * Splits the string 'input' into a list of components. The first component * is the part of 'input' until the first occurence of 'delimiter', if any. * If 'delimiter' was found, the remaining components are defined recursively * via the same procedure, starting with the part of 'input' after the first * substring. * 'delimiter' must be nonempty. * The compile_search variant uses an precompiled delimiter string for better * performance. * * Variables: * list (empty) - the components of 'input', determined based on 'delimiter' */ #include #include #include #include #include #include #include #include #include struct compile_search_instance { NCDModuleInst *i; MemRef str; size_t *table; }; struct instance { NCDModuleInst *i; NCDValRef input; struct ExpArray arr; size_t num; }; static void compile_search_new (void *vo, NCDModuleInst *i, const struct NCDModuleInst_new_params *params) { struct compile_search_instance *o = vo; o->i = i; NCDValRef str_arg; if (!NCDVal_ListRead(params->args, 1, &str_arg)) { ModuleLog(i, BLOG_ERROR, "wrong arity"); goto fail0; } if (!NCDVal_IsString(str_arg)) { ModuleLog(i, BLOG_ERROR, "wrong type"); goto fail0; } o->str = NCDVal_StringMemRef(str_arg); if (o->str.len == 0) { ModuleLog(i, BLOG_ERROR, "string must be nonempty"); goto fail0; } o->table = BAllocArray(o->str.len, sizeof(o->table[0])); if (!o->table) { ModuleLog(i, BLOG_ERROR, "BAllocArray failed"); goto fail0; } build_substring_backtrack_table(o->str, o->table); NCDModuleInst_Backend_Up(i); return; fail0: NCDModuleInst_Backend_DeadError(i); } static void compile_search_die (void *vo) { struct compile_search_instance *o = vo; BFree(o->table); NCDModuleInst_Backend_Dead(o->i); } static void func_new_common (void *vo, NCDModuleInst *i, const struct NCDModuleInst_new_params *params, struct compile_search_instance *compiled) { struct instance *o = vo; o->i = i; int arg_start; MemRef del; size_t const *table; if (compiled) { arg_start = 0; del = compiled->str; table = compiled->table; } else { NCDValRef delimiter_arg; if (!NCDVal_ListReadHead(params->args, 1, &delimiter_arg)) { ModuleLog(i, BLOG_ERROR, "missing delimiter argument"); goto fail0; } if (!NCDVal_IsString(delimiter_arg)) { ModuleLog(i, BLOG_ERROR, "wrong delimiter type"); goto fail0; } arg_start = 1; del = NCDVal_StringMemRef(delimiter_arg); if (del.len == 0) { ModuleLog(i, BLOG_ERROR, "delimiter must be nonempty"); goto fail0; } table = BAllocArray(del.len, sizeof(table[0])); if (!table) { ModuleLog(i, BLOG_ERROR, "ExpArray_init failed"); goto fail0; } build_substring_backtrack_table(del, (size_t *)table); } // read arguments NCDValRef input_arg; NCDValRef limit_arg = NCDVal_NewInvalid(); if (!NCDVal_ListReadStart(params->args, arg_start, 1, &input_arg) && !NCDVal_ListReadStart(params->args, arg_start, 2, &input_arg, &limit_arg)) { ModuleLog(i, BLOG_ERROR, "wrong arity"); goto fail1; } if (!NCDVal_IsString(input_arg)) { ModuleLog(i, BLOG_ERROR, "wrong type"); goto fail1; } o->input = input_arg; size_t limit = SIZE_MAX; if (!NCDVal_IsInvalid(limit_arg)) { uintmax_t n; if (!ncd_read_uintmax(limit_arg, &n) || n == 0) { ModuleLog(i, BLOG_ERROR, "bad limit argument"); goto fail1; } n--; limit = (n <= SIZE_MAX ? n : SIZE_MAX); } if (!ExpArray_init(&o->arr, sizeof(MemRef), 8)) { ModuleLog(i, BLOG_ERROR, "ExpArray_init failed"); goto fail1; } o->num = 0; MemRef data = NCDVal_StringMemRef(input_arg); while (1) { size_t start; int is_end = 0; if (limit == 0 || !find_substring(data, del, table, &start)) { start = data.len; is_end = 1; } if (!ExpArray_resize(&o->arr, o->num + 1)) { ModuleLog(i, BLOG_ERROR, "ExpArray_init failed"); goto fail2; } ((MemRef *)o->arr.v)[o->num] = MemRef_SubTo(data, start); o->num++; if (is_end) { break; } data = MemRef_SubFrom(data, start + del.len); limit--; } if (!compiled) { BFree((size_t *)table); } // signal up NCDModuleInst_Backend_Up(i); return; fail2: free(o->arr.v); fail1: if (!compiled) { BFree((size_t *)table); } fail0: NCDModuleInst_Backend_DeadError(i); } static void func_new (void *vo, NCDModuleInst *i, const struct NCDModuleInst_new_params *params) { return func_new_common(vo, i, params, NULL); } static void func_new_compiled (void *vo, NCDModuleInst *i, const struct NCDModuleInst_new_params *params) { struct compile_search_instance *compiled = NCDModuleInst_Backend_GetUser((NCDModuleInst *)params->method_user); return func_new_common(vo, i, params, compiled); } static void func_die (void *vo) { struct instance *o = vo; free(o->arr.v); NCDModuleInst_Backend_Dead(o->i); } static int func_getvar2 (void *vo, NCD_string_id_t name, NCDValMem *mem, NCDValRef *out) { struct instance *o = vo; if (name == NCD_STRING_EMPTY) { *out = NCDVal_NewList(mem, o->num); if (NCDVal_IsInvalid(*out)) { goto fail; } int is_external = NCDVal_IsExternalString(o->input); for (size_t j = 0; j < o->num; j++) { MemRef elem = ((MemRef *)o->arr.v)[j]; NCDValRef str; if (is_external) { str = NCDVal_NewExternalString(mem, elem.ptr, elem.len, NCDVal_ExternalStringTarget(o->input)); } else { str = NCDVal_NewStringBinMr(mem, elem); } if (NCDVal_IsInvalid(str)) { goto fail; } if (!NCDVal_ListAppend(*out, str)) { goto fail; } } return 1; } return 0; fail: *out = NCDVal_NewInvalid(); return 1; } static struct NCDModule modules[] = { { .type = "compile_search", .func_new2 = compile_search_new, .func_die = compile_search_die, .alloc_size = sizeof(struct compile_search_instance) }, { .type = "explode", .func_new2 = func_new, .func_die = func_die, .func_getvar2 = func_getvar2, .alloc_size = sizeof(struct instance) }, { .type = "compile_search::explode", .func_new2 = func_new_compiled, .func_die = func_die, .func_getvar2 = func_getvar2, .alloc_size = sizeof(struct instance) }, { .type = NULL } }; const struct NCDModuleGroup ncdmodule_explode = { .modules = modules };