explode.c 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335
  1. /**
  2. * @file explode.c
  3. * @author Ambroz Bizjak <ambrop7@gmail.com>
  4. *
  5. * @section LICENSE
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions are met:
  9. * 1. Redistributions of source code must retain the above copyright
  10. * notice, this list of conditions and the following disclaimer.
  11. * 2. Redistributions in binary form must reproduce the above copyright
  12. * notice, this list of conditions and the following disclaimer in the
  13. * documentation and/or other materials provided with the distribution.
  14. * 3. Neither the name of the author nor the
  15. * names of its contributors may be used to endorse or promote products
  16. * derived from this software without specific prior written permission.
  17. *
  18. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  19. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  20. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  21. * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
  22. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  23. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  24. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  25. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  26. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  27. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28. *
  29. * @section DESCRIPTION
  30. *
  31. *
  32. * Synopsis:
  33. * compile_search(string str)
  34. *
  35. * Description:
  36. * Performs calculations to enable efficient string searches for the
  37. * given string (builds the KMP table). The string must be non-empty.
  38. *
  39. *
  40. * Synopsis:
  41. * explode(string delimiter, string input [, string limit])
  42. * compile_search::explode(string input [, string limit])
  43. *
  44. * Description:
  45. * Splits the string 'input' into a list of components. The first component
  46. * is the part of 'input' until the first occurence of 'delimiter', if any.
  47. * If 'delimiter' was found, the remaining components are defined recursively
  48. * via the same procedure, starting with the part of 'input' after the first
  49. * substring.
  50. * 'delimiter' must be nonempty.
  51. * The compile_search variant uses an precompiled delimiter string for better
  52. * performance.
  53. *
  54. * Variables:
  55. * list (empty) - the components of 'input', determined based on 'delimiter'
  56. */
  57. #include <stdlib.h>
  58. #include <string.h>
  59. #include <limits.h>
  60. #include <misc/exparray.h>
  61. #include <misc/string_begins_with.h>
  62. #include <misc/substring.h>
  63. #include <misc/balloc.h>
  64. #include <ncd/module_common.h>
  65. #include <generated/blog_channel_ncd_explode.h>
  66. struct compile_search_instance {
  67. NCDModuleInst *i;
  68. MemRef str;
  69. size_t *table;
  70. };
  71. struct instance {
  72. NCDModuleInst *i;
  73. struct ExpArray arr;
  74. size_t num;
  75. };
  76. struct substring {
  77. char *data;
  78. size_t len;
  79. };
  80. static void compile_search_new (void *vo, NCDModuleInst *i, const struct NCDModuleInst_new_params *params)
  81. {
  82. struct compile_search_instance *o = vo;
  83. o->i = i;
  84. NCDValRef str_arg;
  85. if (!NCDVal_ListRead(params->args, 1, &str_arg)) {
  86. ModuleLog(i, BLOG_ERROR, "wrong arity");
  87. goto fail0;
  88. }
  89. if (!NCDVal_IsString(str_arg)) {
  90. ModuleLog(i, BLOG_ERROR, "wrong type");
  91. goto fail0;
  92. }
  93. o->str = NCDVal_StringMemRef(str_arg);
  94. if (o->str.len == 0) {
  95. ModuleLog(i, BLOG_ERROR, "string must be nonempty");
  96. goto fail0;
  97. }
  98. o->table = BAllocArray(o->str.len, sizeof(o->table[0]));
  99. if (!o->table) {
  100. ModuleLog(i, BLOG_ERROR, "BAllocArray failed");
  101. goto fail0;
  102. }
  103. build_substring_backtrack_table(o->str.ptr, o->str.len, o->table);
  104. NCDModuleInst_Backend_Up(i);
  105. return;
  106. fail0:
  107. NCDModuleInst_Backend_DeadError(i);
  108. }
  109. static void compile_search_die (void *vo)
  110. {
  111. struct compile_search_instance *o = vo;
  112. BFree(o->table);
  113. NCDModuleInst_Backend_Dead(o->i);
  114. }
  115. static void func_new_common (void *vo, NCDModuleInst *i, const struct NCDModuleInst_new_params *params, struct compile_search_instance *compiled)
  116. {
  117. struct instance *o = vo;
  118. o->i = i;
  119. int arg_start;
  120. MemRef del;
  121. size_t const *table;
  122. if (compiled) {
  123. arg_start = 0;
  124. del = compiled->str;
  125. table = compiled->table;
  126. } else {
  127. NCDValRef delimiter_arg;
  128. if (!NCDVal_ListReadHead(params->args, 1, &delimiter_arg)) {
  129. ModuleLog(i, BLOG_ERROR, "missing delimiter argument");
  130. goto fail0;
  131. }
  132. if (!NCDVal_IsString(delimiter_arg)) {
  133. ModuleLog(i, BLOG_ERROR, "wrong delimiter type");
  134. goto fail0;
  135. }
  136. arg_start = 1;
  137. del = NCDVal_StringMemRef(delimiter_arg);
  138. if (del.len == 0) {
  139. ModuleLog(i, BLOG_ERROR, "delimiter must be nonempty");
  140. goto fail0;
  141. }
  142. table = BAllocArray(del.len, sizeof(table[0]));
  143. if (!table) {
  144. ModuleLog(i, BLOG_ERROR, "ExpArray_init failed");
  145. goto fail0;
  146. }
  147. build_substring_backtrack_table(del.ptr, del.len, (size_t *)table);
  148. }
  149. // read arguments
  150. NCDValRef input_arg;
  151. NCDValRef limit_arg = NCDVal_NewInvalid();
  152. if (!NCDVal_ListReadStart(params->args, arg_start, 1, &input_arg) && !NCDVal_ListReadStart(params->args, arg_start, 2, &input_arg, &limit_arg)) {
  153. ModuleLog(i, BLOG_ERROR, "wrong arity");
  154. goto fail1;
  155. }
  156. if (!NCDVal_IsString(input_arg)) {
  157. ModuleLog(i, BLOG_ERROR, "wrong type");
  158. goto fail1;
  159. }
  160. size_t limit = SIZE_MAX;
  161. if (!NCDVal_IsInvalid(limit_arg)) {
  162. uintmax_t n;
  163. if (!ncd_read_uintmax(limit_arg, &n) || n == 0) {
  164. ModuleLog(i, BLOG_ERROR, "bad limit argument");
  165. goto fail1;
  166. }
  167. n--;
  168. limit = (n <= SIZE_MAX ? n : SIZE_MAX);
  169. }
  170. if (!ExpArray_init(&o->arr, sizeof(struct substring), 8)) {
  171. ModuleLog(i, BLOG_ERROR, "ExpArray_init failed");
  172. goto fail1;
  173. }
  174. o->num = 0;
  175. MemRef data = NCDVal_StringMemRef(input_arg);
  176. while (1) {
  177. size_t start;
  178. int is_end = 0;
  179. if (limit == 0 || !find_substring(data.ptr, data.len, del.ptr, del.len, table, &start)) {
  180. start = data.len;
  181. is_end = 1;
  182. }
  183. if (!ExpArray_resize(&o->arr, o->num + 1)) {
  184. ModuleLog(i, BLOG_ERROR, "ExpArray_init failed");
  185. goto fail2;
  186. }
  187. struct substring *elem = &((struct substring *)o->arr.v)[o->num];
  188. if (!(elem->data = BAlloc(start))) {
  189. ModuleLog(i, BLOG_ERROR, "BAlloc failed");
  190. goto fail2;
  191. }
  192. memcpy(elem->data, data.ptr, start);
  193. elem->len = start;
  194. o->num++;
  195. if (is_end) {
  196. break;
  197. }
  198. data = MemRef_SubFrom(data, start + del.len);
  199. limit--;
  200. }
  201. if (!compiled) {
  202. BFree((size_t *)table);
  203. }
  204. // signal up
  205. NCDModuleInst_Backend_Up(i);
  206. return;
  207. fail2:
  208. while (o->num-- > 0) {
  209. BFree(((struct substring *)o->arr.v)[o->num].data);
  210. }
  211. free(o->arr.v);
  212. fail1:
  213. if (!compiled) {
  214. BFree((size_t *)table);
  215. }
  216. fail0:
  217. NCDModuleInst_Backend_DeadError(i);
  218. }
  219. static void func_new (void *vo, NCDModuleInst *i, const struct NCDModuleInst_new_params *params)
  220. {
  221. return func_new_common(vo, i, params, NULL);
  222. }
  223. static void func_new_compiled (void *vo, NCDModuleInst *i, const struct NCDModuleInst_new_params *params)
  224. {
  225. struct compile_search_instance *compiled = NCDModuleInst_Backend_GetUser((NCDModuleInst *)params->method_user);
  226. return func_new_common(vo, i, params, compiled);
  227. }
  228. static void func_die (void *vo)
  229. {
  230. struct instance *o = vo;
  231. while (o->num-- > 0) {
  232. BFree(((struct substring *)o->arr.v)[o->num].data);
  233. }
  234. free(o->arr.v);
  235. NCDModuleInst_Backend_Dead(o->i);
  236. }
  237. static int func_getvar2 (void *vo, NCD_string_id_t name, NCDValMem *mem, NCDValRef *out)
  238. {
  239. struct instance *o = vo;
  240. if (name == NCD_STRING_EMPTY) {
  241. *out = NCDVal_NewList(mem, o->num);
  242. if (NCDVal_IsInvalid(*out)) {
  243. goto fail;
  244. }
  245. for (size_t j = 0; j < o->num; j++) {
  246. struct substring *elem = &((struct substring *)o->arr.v)[j];
  247. NCDValRef str = NCDVal_NewStringBin(mem, (uint8_t *)elem->data, elem->len);
  248. if (NCDVal_IsInvalid(str)) {
  249. goto fail;
  250. }
  251. if (!NCDVal_ListAppend(*out, str)) {
  252. goto fail;
  253. }
  254. }
  255. return 1;
  256. }
  257. return 0;
  258. fail:
  259. *out = NCDVal_NewInvalid();
  260. return 1;
  261. }
  262. static struct NCDModule modules[] = {
  263. {
  264. .type = "compile_search",
  265. .func_new2 = compile_search_new,
  266. .func_die = compile_search_die,
  267. .alloc_size = sizeof(struct compile_search_instance)
  268. }, {
  269. .type = "explode",
  270. .func_new2 = func_new,
  271. .func_die = func_die,
  272. .func_getvar2 = func_getvar2,
  273. .alloc_size = sizeof(struct instance)
  274. }, {
  275. .type = "compile_search::explode",
  276. .func_new2 = func_new_compiled,
  277. .func_die = func_die,
  278. .func_getvar2 = func_getvar2,
  279. .alloc_size = sizeof(struct instance)
  280. }, {
  281. .type = NULL
  282. }
  283. };
  284. const struct NCDModuleGroup ncdmodule_explode = {
  285. .modules = modules
  286. };