cstring.h 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311
  1. /**
  2. * @file cstring.h
  3. * @author Ambroz Bizjak <ambrop7@gmail.com>
  4. *
  5. * @section LICENSE
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions are met:
  9. * 1. Redistributions of source code must retain the above copyright
  10. * notice, this list of conditions and the following disclaimer.
  11. * 2. Redistributions in binary form must reproduce the above copyright
  12. * notice, this list of conditions and the following disclaimer in the
  13. * documentation and/or other materials provided with the distribution.
  14. * 3. Neither the name of the author nor the
  15. * names of its contributors may be used to endorse or promote products
  16. * derived from this software without specific prior written permission.
  17. *
  18. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  19. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  20. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  21. * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
  22. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  23. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  24. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  25. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  26. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  27. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28. */
  29. #ifndef BADVPN_COMPOSED_STRING_H
  30. #define BADVPN_COMPOSED_STRING_H
  31. #include <stddef.h>
  32. #include <string.h>
  33. #include <limits.h>
  34. #include <misc/debug.h>
  35. #include <misc/balloc.h>
  36. struct b_cstring_s;
  37. /**
  38. * Callback function which is called by {@link b_cstring_get} to access the underlying resource.
  39. * \a cstr points to the cstring being accessed, and the callback can use the userN members to
  40. * retrieve any state information.
  41. * \a offset is the offset from the beginning of the string; offset < cstr->length.
  42. * This callback must set *\a out_length and return a pointer, representing a continuous
  43. * region of the string that starts at the byte at index \a offset. Returning a region that
  44. * spans past the end of the string is allowed.
  45. */
  46. typedef const char * (*b_cstring_func) (const struct b_cstring_s *cstr, size_t offset, size_t *out_length);
  47. /**
  48. * An abstract string which is not necessarily continuous. Given a cstring, its length
  49. * can be determined by reading the 'length' member, and its data can be read using
  50. * {@link b_cstring_get} (which internally invokes the {@link b_cstring_func} callback).
  51. */
  52. typedef struct b_cstring_s {
  53. size_t length;
  54. b_cstring_func func;
  55. union {
  56. size_t size;
  57. void *ptr;
  58. void (*fptr) (void);
  59. } user1;
  60. union {
  61. size_t size;
  62. void *ptr;
  63. void (*fptr) (void);
  64. } user2;
  65. union {
  66. size_t size;
  67. void *ptr;
  68. void (*fptr) (void);
  69. } user3;
  70. } b_cstring;
  71. /**
  72. * Makes a cstring pointing to a buffer.
  73. * \a data may be NULL if \a length is 0.
  74. */
  75. static b_cstring b_cstring_make_buf (const char *data, size_t length);
  76. /**
  77. * Makes a cstring which represents an empty string.
  78. */
  79. static b_cstring b_cstring_make_empty (void);
  80. /**
  81. * Retrieves a pointer to a continuous region of the string.
  82. * \a offset specifies the starting offset of the region to retrieve, and must be < cstr.length.
  83. * \a maxlen specifies the maximum length of the returned region, and must be > 0.
  84. * The length of the region will be stored in *\a out_chunk_len, and it will always be > 0.
  85. * It is possible that the returned region spans past the end of the string, unless limited
  86. * by \a maxlen. The pointer to the region will be returned; it will point to the byte
  87. * at offset exactly \a offset into the string.
  88. */
  89. static const char * b_cstring_get (b_cstring cstr, size_t offset, size_t maxlen, size_t *out_chunk_len);
  90. /**
  91. * Retrieves the byte in the string at position \a pos.
  92. */
  93. static char b_cstring_at (b_cstring cstr, size_t pos);
  94. /**
  95. * Asserts that the range given by \a offset and \a length is valid for the string.
  96. */
  97. static void b_cstring_assert_range (b_cstring cstr, size_t offset, size_t length);
  98. /**
  99. * Copies a range to an external buffer.
  100. */
  101. static void b_cstring_copy_to_buf (b_cstring cstr, size_t offset, size_t length, char *dest);
  102. /**
  103. * Performs a memcmp-like operation on the given ranges of two cstrings.
  104. */
  105. static int b_cstring_memcmp (b_cstring cstr1, b_cstring cstr2, size_t offset1, size_t offset2, size_t length);
  106. /**
  107. * Determines if a range within a string is equal to the bytes in an external buffer.
  108. */
  109. static int b_cstring_equals_buffer (b_cstring cstr, size_t offset, size_t length, const char *data);
  110. /**
  111. * Determines if a range within a string contains the byte \a ch.
  112. * Returns 1 if it does, and 0 if it does not. If it does contain it, and \a out_pos is not
  113. * NULL, *\a out_pos is set to the index of the first matching byte in the range.
  114. */
  115. static int b_cstring_memchr (b_cstring cstr, size_t offset, size_t length, char ch, size_t *out_pos);
  116. /**
  117. * Allocates a buffer for a range and copies it. The buffer is allocated using {@link BAlloc}.
  118. * An extra null byte will be appended. On failure, returns NULL.
  119. */
  120. static char * b_cstring_strdup (b_cstring cstr, size_t offset, size_t length);
  121. /**
  122. * Macro which iterates the continuous regions of a range within a cstring.
  123. * For reach region, the statements in \a body are executed, in order.
  124. * \a cstr is the string to be iterated.
  125. * \a offset and \a length specify the range of the string to iterate; they must
  126. * refer to a valid range for the string.
  127. * \a rel_pos_var, \a chunk_data_var and \a chunk_length_var specify names of variables
  128. * which will be available in \a body.
  129. * \a rel_pos_var will hold the offset of the current continuous region, relative to the beginning.
  130. * \a chunk_data_var will hold a pointer (const char *) to the beginning of the region, and
  131. * \a chunk_length_var will hold its length.
  132. *
  133. * See the implementation of {@link b_cstring_copy_to_buf} for a usage example.
  134. */
  135. #define B_CSTRING_LOOP_RANGE(cstr, offset, length, rel_pos_var, chunk_data_var, chunk_length_var, body) \
  136. { \
  137. size_t rel_pos_var = 0; \
  138. while (rel_pos_var < (length)) { \
  139. size_t chunk_length_var; \
  140. const char *chunk_data_var = b_cstring_get((cstr), (offset) + rel_pos_var, (length) - rel_pos_var, &chunk_length_var); \
  141. { body } \
  142. rel_pos_var += chunk_length_var; \
  143. } \
  144. }
  145. /**
  146. * Like {@link B_CSTRING_LOOP_RANGE}, but iterates the entire string,
  147. * i.e. offset==0 and length==cstr.length.
  148. */
  149. #define B_CSTRING_LOOP(cstr, rel_pos_var, chunk_data_var, chunk_length_var, body) B_CSTRING_LOOP_RANGE(cstr, 0, (cstr).length, rel_pos_var, chunk_data_var, chunk_length_var, body)
  150. static const char * b_cstring__buf_func (const b_cstring *cstr, size_t offset, size_t *out_length)
  151. {
  152. ASSERT(offset < cstr->length)
  153. ASSERT(out_length)
  154. ASSERT(cstr->func == b_cstring__buf_func)
  155. ASSERT(cstr->user1.ptr)
  156. *out_length = cstr->length - offset;
  157. return (const char *)cstr->user1.ptr + offset;
  158. }
  159. static b_cstring b_cstring_make_buf (const char *data, size_t length)
  160. {
  161. ASSERT(length == 0 || data)
  162. b_cstring cstr;
  163. cstr.length = length;
  164. cstr.func = b_cstring__buf_func;
  165. cstr.user1.ptr = (void *)data;
  166. return cstr;
  167. }
  168. static b_cstring b_cstring_make_empty (void)
  169. {
  170. b_cstring cstr;
  171. cstr.length = 0;
  172. cstr.func = NULL;
  173. return cstr;
  174. }
  175. static const char * b_cstring_get (b_cstring cstr, size_t offset, size_t maxlen, size_t *out_chunk_len)
  176. {
  177. ASSERT(offset < cstr.length)
  178. ASSERT(maxlen > 0)
  179. ASSERT(out_chunk_len)
  180. ASSERT(cstr.func)
  181. const char *data = cstr.func(&cstr, offset, out_chunk_len);
  182. ASSERT(data)
  183. ASSERT(*out_chunk_len > 0)
  184. if (*out_chunk_len > maxlen) {
  185. *out_chunk_len = maxlen;
  186. }
  187. return data;
  188. }
  189. static char b_cstring_at (b_cstring cstr, size_t pos)
  190. {
  191. ASSERT(pos < cstr.length)
  192. ASSERT(cstr.func)
  193. size_t chunk_len;
  194. const char *data = cstr.func(&cstr, pos, &chunk_len);
  195. ASSERT(data)
  196. ASSERT(chunk_len > 0)
  197. return *data;
  198. }
  199. static void b_cstring_assert_range (b_cstring cstr, size_t offset, size_t length)
  200. {
  201. ASSERT(offset <= cstr.length)
  202. ASSERT(length <= cstr.length - offset)
  203. }
  204. static void b_cstring_copy_to_buf (b_cstring cstr, size_t offset, size_t length, char *dest)
  205. {
  206. b_cstring_assert_range(cstr, offset, length);
  207. ASSERT(length == 0 || dest)
  208. B_CSTRING_LOOP_RANGE(cstr, offset, length, pos, chunk_data, chunk_length, {
  209. memcpy(dest + pos, chunk_data, chunk_length);
  210. })
  211. }
  212. static int b_cstring_memcmp (b_cstring cstr1, b_cstring cstr2, size_t offset1, size_t offset2, size_t length)
  213. {
  214. b_cstring_assert_range(cstr1, offset1, length);
  215. b_cstring_assert_range(cstr2, offset2, length);
  216. B_CSTRING_LOOP_RANGE(cstr1, offset1, length, pos1, chunk_data1, chunk_len1, {
  217. B_CSTRING_LOOP_RANGE(cstr2, offset2 + pos1, chunk_len1, pos2, chunk_data2, chunk_len2, {
  218. int cmp = memcmp(chunk_data1 + pos2, chunk_data2, chunk_len2);
  219. if (cmp) {
  220. return cmp;
  221. }
  222. })
  223. })
  224. return 0;
  225. }
  226. static int b_cstring_equals_buffer (b_cstring cstr, size_t offset, size_t length, const char *data)
  227. {
  228. b_cstring_assert_range(cstr, offset, length);
  229. B_CSTRING_LOOP_RANGE(cstr, offset, length, pos, chunk_data, chunk_len, {
  230. if (memcmp(chunk_data, data + pos, chunk_len)) {
  231. return 0;
  232. }
  233. })
  234. return 1;
  235. }
  236. static int b_cstring_memchr (b_cstring cstr, size_t offset, size_t length, char ch, size_t *out_pos)
  237. {
  238. b_cstring_assert_range(cstr, offset, length);
  239. B_CSTRING_LOOP_RANGE(cstr, offset, length, pos, chunk_data, chunk_length, {
  240. for (size_t i = 0; i < chunk_length; i++) {
  241. if (chunk_data[i] == ch) {
  242. if (out_pos) {
  243. *out_pos = pos + i;
  244. }
  245. return 1;
  246. }
  247. }
  248. })
  249. return 0;
  250. }
  251. static char * b_cstring_strdup (b_cstring cstr, size_t offset, size_t length)
  252. {
  253. b_cstring_assert_range(cstr, offset, length);
  254. if (length == SIZE_MAX) {
  255. return NULL;
  256. }
  257. char *buf = BAlloc(length + 1);
  258. if (buf) {
  259. b_cstring_copy_to_buf(cstr, offset, length, buf);
  260. buf[length] = '\0';
  261. }
  262. return buf;
  263. }
  264. #endif