| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324 |
- /**
- * @file explode.c
- * @author Ambroz Bizjak <ambrop7@gmail.com>
- *
- * @section LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the author nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * @section DESCRIPTION
- *
- *
- * Synopsis:
- * compile_search(string str)
- *
- * Description:
- * Performs calculations to enable efficient string searches for the
- * given string (builds the KMP table). The string must be non-empty.
- *
- *
- * Synopsis:
- * explode(string delimiter, string input [, string limit])
- * compile_search::explode(string input [, string limit])
- *
- * Description:
- * Splits the string 'input' into a list of components. The first component
- * is the part of 'input' until the first occurence of 'delimiter', if any.
- * If 'delimiter' was found, the remaining components are defined recursively
- * via the same procedure, starting with the part of 'input' after the first
- * substring.
- * 'delimiter' must be nonempty.
- * The compile_search variant uses an precompiled delimiter string for better
- * performance.
- *
- * Variables:
- * list (empty) - the components of 'input', determined based on 'delimiter'
- */
- #include <stdlib.h>
- #include <string.h>
- #include <limits.h>
- #include <misc/exparray.h>
- #include <misc/string_begins_with.h>
- #include <misc/substring.h>
- #include <misc/balloc.h>
- #include <ncd/module_common.h>
- #include <generated/blog_channel_ncd_explode.h>
- struct compile_search_instance {
- NCDModuleInst *i;
- MemRef str;
- size_t *table;
- };
- struct instance {
- NCDModuleInst *i;
- NCDValRef input;
- struct ExpArray arr;
- size_t num;
- };
- static void compile_search_new (void *vo, NCDModuleInst *i, const struct NCDModuleInst_new_params *params)
- {
- struct compile_search_instance *o = vo;
- o->i = i;
-
- NCDValRef str_arg;
- if (!NCDVal_ListRead(params->args, 1, &str_arg)) {
- ModuleLog(i, BLOG_ERROR, "wrong arity");
- goto fail0;
- }
- if (!NCDVal_IsString(str_arg)) {
- ModuleLog(i, BLOG_ERROR, "wrong type");
- goto fail0;
- }
-
- o->str = NCDVal_StringMemRef(str_arg);
- if (o->str.len == 0) {
- ModuleLog(i, BLOG_ERROR, "string must be nonempty");
- goto fail0;
- }
-
- o->table = BAllocArray(o->str.len, sizeof(o->table[0]));
- if (!o->table) {
- ModuleLog(i, BLOG_ERROR, "BAllocArray failed");
- goto fail0;
- }
-
- build_substring_backtrack_table(o->str, o->table);
-
- NCDModuleInst_Backend_Up(i);
- return;
-
- fail0:
- NCDModuleInst_Backend_DeadError(i);
- }
- static void compile_search_die (void *vo)
- {
- struct compile_search_instance *o = vo;
-
- BFree(o->table);
-
- NCDModuleInst_Backend_Dead(o->i);
- }
- static void func_new_common (void *vo, NCDModuleInst *i, const struct NCDModuleInst_new_params *params, struct compile_search_instance *compiled)
- {
- struct instance *o = vo;
- o->i = i;
-
- int arg_start;
- MemRef del;
- size_t const *table;
-
- if (compiled) {
- arg_start = 0;
- del = compiled->str;
- table = compiled->table;
- } else {
- NCDValRef delimiter_arg;
- if (!NCDVal_ListReadHead(params->args, 1, &delimiter_arg)) {
- ModuleLog(i, BLOG_ERROR, "missing delimiter argument");
- goto fail0;
- }
- if (!NCDVal_IsString(delimiter_arg)) {
- ModuleLog(i, BLOG_ERROR, "wrong delimiter type");
- goto fail0;
- }
- arg_start = 1;
-
- del = NCDVal_StringMemRef(delimiter_arg);
- if (del.len == 0) {
- ModuleLog(i, BLOG_ERROR, "delimiter must be nonempty");
- goto fail0;
- }
-
- table = BAllocArray(del.len, sizeof(table[0]));
- if (!table) {
- ModuleLog(i, BLOG_ERROR, "ExpArray_init failed");
- goto fail0;
- }
-
- build_substring_backtrack_table(del, (size_t *)table);
- }
-
- // read arguments
- NCDValRef input_arg;
- NCDValRef limit_arg = NCDVal_NewInvalid();
- if (!NCDVal_ListReadStart(params->args, arg_start, 1, &input_arg) && !NCDVal_ListReadStart(params->args, arg_start, 2, &input_arg, &limit_arg)) {
- ModuleLog(i, BLOG_ERROR, "wrong arity");
- goto fail1;
- }
- if (!NCDVal_IsString(input_arg)) {
- ModuleLog(i, BLOG_ERROR, "wrong type");
- goto fail1;
- }
- o->input = input_arg;
-
- size_t limit = SIZE_MAX;
- if (!NCDVal_IsInvalid(limit_arg)) {
- uintmax_t n;
- if (!ncd_read_uintmax(limit_arg, &n) || n == 0) {
- ModuleLog(i, BLOG_ERROR, "bad limit argument");
- goto fail1;
- }
- n--;
- limit = (n <= SIZE_MAX ? n : SIZE_MAX);
- }
-
- if (!ExpArray_init(&o->arr, sizeof(MemRef), 8)) {
- ModuleLog(i, BLOG_ERROR, "ExpArray_init failed");
- goto fail1;
- }
- o->num = 0;
-
- MemRef data = NCDVal_StringMemRef(input_arg);
-
- while (1) {
- size_t start;
- int is_end = 0;
- if (limit == 0 || !find_substring(data, del, table, &start)) {
- start = data.len;
- is_end = 1;
- }
-
- if (!ExpArray_resize(&o->arr, o->num + 1)) {
- ModuleLog(i, BLOG_ERROR, "ExpArray_init failed");
- goto fail2;
- }
-
- ((MemRef *)o->arr.v)[o->num] = MemRef_SubTo(data, start);
- o->num++;
-
- if (is_end) {
- break;
- }
-
- data = MemRef_SubFrom(data, start + del.len);
- limit--;
- }
-
- if (!compiled) {
- BFree((size_t *)table);
- }
-
- // signal up
- NCDModuleInst_Backend_Up(i);
- return;
- fail2:
- free(o->arr.v);
- fail1:
- if (!compiled) {
- BFree((size_t *)table);
- }
- fail0:
- NCDModuleInst_Backend_DeadError(i);
- }
- static void func_new (void *vo, NCDModuleInst *i, const struct NCDModuleInst_new_params *params)
- {
- return func_new_common(vo, i, params, NULL);
- }
- static void func_new_compiled (void *vo, NCDModuleInst *i, const struct NCDModuleInst_new_params *params)
- {
- struct compile_search_instance *compiled = NCDModuleInst_Backend_GetUser((NCDModuleInst *)params->method_user);
-
- return func_new_common(vo, i, params, compiled);
- }
- static void func_die (void *vo)
- {
- struct instance *o = vo;
-
- free(o->arr.v);
-
- NCDModuleInst_Backend_Dead(o->i);
- }
- static int func_getvar2 (void *vo, NCD_string_id_t name, NCDValMem *mem, NCDValRef *out)
- {
- struct instance *o = vo;
-
- if (name == NCD_STRING_EMPTY) {
- *out = NCDVal_NewList(mem, o->num);
- if (NCDVal_IsInvalid(*out)) {
- goto fail;
- }
- int is_external = NCDVal_IsExternalString(o->input);
- for (size_t j = 0; j < o->num; j++) {
- MemRef elem = ((MemRef *)o->arr.v)[j];
- NCDValRef str;
- if (is_external) {
- str = NCDVal_NewExternalString(mem, elem.ptr, elem.len, NCDVal_ExternalStringTarget(o->input));
- } else {
- str = NCDVal_NewStringBinMr(mem, elem);
- }
- if (NCDVal_IsInvalid(str)) {
- goto fail;
- }
- if (!NCDVal_ListAppend(*out, str)) {
- goto fail;
- }
- }
- return 1;
- }
-
- return 0;
-
- fail:
- *out = NCDVal_NewInvalid();
- return 1;
- }
- static struct NCDModule modules[] = {
- {
- .type = "compile_search",
- .func_new2 = compile_search_new,
- .func_die = compile_search_die,
- .alloc_size = sizeof(struct compile_search_instance)
- }, {
- .type = "explode",
- .func_new2 = func_new,
- .func_die = func_die,
- .func_getvar2 = func_getvar2,
- .alloc_size = sizeof(struct instance)
- }, {
- .type = "compile_search::explode",
- .func_new2 = func_new_compiled,
- .func_die = func_die,
- .func_getvar2 = func_getvar2,
- .alloc_size = sizeof(struct instance)
- }, {
- .type = NULL
- }
- };
- const struct NCDModuleGroup ncdmodule_explode = {
- .modules = modules
- };
|