Quellcode durchsuchen

ncd: NCDVal: initial implementation of ComposedString

ambrop7 vor 13 Jahren
Ursprung
Commit
f8e2d11d26
2 geänderte Dateien mit 513 neuen und 141 gelöschten Zeilen
  1. 350 84
      ncd/NCDVal.c
  2. 163 57
      ncd/NCDVal.h

+ 350 - 84
ncd/NCDVal.c

@@ -42,22 +42,23 @@
 
 #include <generated/blog_channel_NCDVal.h>
 
-//#define NCDVAL_TEST_EXTERNAL_STRINGS
-
 #define TYPE_MASK_EXTERNAL_TYPE ((1 << 3) - 1)
 #define TYPE_MASK_INTERNAL_TYPE ((1 << 5) - 1)
 #define TYPE_SHIFT_DEPTH 5
 
+#define STOREDSTRING_TYPE (NCDVAL_STRING | (0 << 3))
 #define IDSTRING_TYPE (NCDVAL_STRING | (1 << 3))
 #define EXTERNALSTRING_TYPE (NCDVAL_STRING | (2 << 3))
+#define COMPOSEDSTRING_TYPE (NCDVAL_STRING | (3 << 3))
 
 static int make_type (int internal_type, int depth)
 {
-    ASSERT(internal_type == NCDVAL_STRING ||
-           internal_type == NCDVAL_LIST ||
+    ASSERT(internal_type == NCDVAL_LIST ||
            internal_type == NCDVAL_MAP ||
+           internal_type == STOREDSTRING_TYPE ||
            internal_type == IDSTRING_TYPE ||
-           internal_type == EXTERNALSTRING_TYPE)
+           internal_type == EXTERNALSTRING_TYPE ||
+           internal_type == COMPOSEDSTRING_TYPE)
     ASSERT(depth >= 0)
     ASSERT(depth <= NCDVAL_MAX_DEPTH)
     
@@ -187,7 +188,7 @@ static void NCDVal__AssertValOnly (NCDValMem *mem, NCDVal__idx idx)
     ASSERT(get_depth(*type_ptr) <= NCDVAL_MAX_DEPTH)
     
     switch (get_internal_type(*type_ptr)) {
-        case NCDVAL_STRING: {
+        case STOREDSTRING_TYPE: {
             ASSERT(idx + sizeof(struct NCDVal__string) <= mem->used)
             struct NCDVal__string *str_e = NCDValMem__BufAt(mem, idx);
             ASSERT(str_e->length >= 0)
@@ -222,6 +223,13 @@ static void NCDVal__AssertValOnly (NCDValMem *mem, NCDVal__idx idx)
             ASSERT(!exs_e->ref.target || exs_e->ref.next >= -1)
             ASSERT(!exs_e->ref.target || exs_e->ref.next < mem->used)
         } break;
+        case COMPOSEDSTRING_TYPE: {
+            ASSERT(idx + sizeof(struct NCDVal__composedstring) <= mem->used)
+            struct NCDVal__composedstring *cms_e = NCDValMem__BufAt(mem, idx);
+            ASSERT(cms_e->func_getptr)
+            ASSERT(!cms_e->ref.target || cms_e->ref.next >= -1)
+            ASSERT(!cms_e->ref.target || cms_e->ref.next < mem->used)
+        } break;
         default: ASSERT(0);
     }
 #endif
@@ -422,7 +430,7 @@ NCDValRef NCDVal_NewCopy (NCDValMem *mem, NCDValRef val)
     void *ptr = NCDValMem__BufAt(val.mem, val.idx);
     
     switch (get_internal_type(*(int *)ptr)) {
-        case NCDVAL_STRING: {
+        case STOREDSTRING_TYPE: {
             struct NCDVal__string *str_e = ptr;
             
             NCDVal__idx size = sizeof(struct NCDVal__string) + str_e->length + 1;
@@ -516,6 +524,17 @@ NCDValRef NCDVal_NewCopy (NCDValMem *mem, NCDValRef val)
             return NCDVal_NewExternalString(mem, exs_e->data, exs_e->length, exs_e->ref.target);
         } break;
         
+        case COMPOSEDSTRING_TYPE: {
+            struct NCDVal__composedstring *cms_e = ptr;
+            
+            struct NCDVal_string_resource resource;
+            resource.func_getptr = cms_e->func_getptr;
+            resource.user = cms_e->user;
+            resource.ref_target = cms_e->ref.target;
+            
+            return NCDVal_NewComposedString(mem, resource, cms_e->offset, cms_e->length);
+        } break;
+        
         default: ASSERT(0);
     }
     
@@ -543,7 +562,7 @@ int NCDVal_Compare (NCDValRef val1, NCDValRef val2)
             size_t len2 = NCDVal_StringLength(val2);
             size_t min_len = len1 < len2 ? len1 : len2;
             
-            int cmp = memcmp(NCDVal_StringData(val1), NCDVal_StringData(val2), min_len);
+            int cmp = NCDVal_StringMemCmp(val1, val2, 0, 0, min_len);
             if (cmp) {
                 return (cmp > 0) - (cmp < 0);
             }
@@ -647,6 +666,31 @@ int NCDVal_IsString (NCDValRef val)
     return NCDVal_Type(val) == NCDVAL_STRING;
 }
 
+int NCDVal_IsContinuousString (NCDValRef val)
+{
+    NCDVal__AssertVal(val);
+    
+    if (val.idx < -1) {
+        return 0;
+    }
+    
+    switch (get_internal_type(*(int *)NCDValMem__BufAt(val.mem, val.idx))) {
+        case STOREDSTRING_TYPE:
+        case IDSTRING_TYPE:
+        case EXTERNALSTRING_TYPE:
+            return 1;
+        default:
+            return 0;
+    }
+}
+
+int NCDVal_IsStoredString (NCDValRef val)
+{
+    NCDVal__AssertVal(val);
+    
+    return !(val.idx < -1) && get_internal_type(*(int *)NCDValMem__BufAt(val.mem, val.idx)) == STOREDSTRING_TYPE;
+}
+
 int NCDVal_IsIdString (NCDValRef val)
 {
     NCDVal__AssertVal(val);
@@ -661,6 +705,13 @@ int NCDVal_IsExternalString (NCDValRef val)
     return !(val.idx < -1) && get_internal_type(*(int *)NCDValMem__BufAt(val.mem, val.idx)) == EXTERNALSTRING_TYPE;
 }
 
+int NCDVal_IsComposedString (NCDValRef val)
+{
+    NCDVal__AssertVal(val);
+    
+    return !(val.idx < -1) && get_internal_type(*(int *)NCDValMem__BufAt(val.mem, val.idx)) == COMPOSEDSTRING_TYPE;
+}
+
 int NCDVal_IsStringNoNulls (NCDValRef val)
 {
     NCDVal__AssertVal(val);
@@ -677,56 +728,6 @@ NCDValRef NCDVal_NewString (NCDValMem *mem, const char *data)
     return NCDVal_NewStringBin(mem, (const uint8_t *)data, strlen(data));
 }
 
-#ifdef NCDVAL_TEST_EXTERNAL_STRINGS
-
-struct test_ext_str {
-    NCDRefTarget ref_target;
-    char *data;
-};
-
-static void test_ext_str_ref_target_func_dealloc (NCDRefTarget *ref_target)
-{
-    struct test_ext_str *tes = UPPER_OBJECT(ref_target, struct test_ext_str, ref_target);
-    BFree(tes->data);
-    BFree(tes);
-}
-
-NCDValRef NCDVal_NewStringBin (NCDValMem *mem, const uint8_t *data, size_t len)
-{
-    NCDVal__AssertMem(mem);
-    ASSERT(len == 0 || data)
-    NCDVal_AssertExternal(mem, data, len);
-    
-    struct test_ext_str *tes = BAlloc(sizeof(*tes));
-    if (!tes) {
-        goto fail0;
-    }
-    
-    tes->data = BAlloc(len);
-    if (!tes->data) {
-        goto fail1;
-    }
-    
-    if (len > 0) {
-        memcpy(tes->data, data, len);
-    }
-    
-    NCDRefTarget_Init(&tes->ref_target, test_ext_str_ref_target_func_dealloc);
-    
-    NCDValRef res = NCDVal_NewExternalString(mem, tes->data, len, &tes->ref_target);
-    NCDRefTarget_Deref(&tes->ref_target);
-    return res;
-    
-fail1:
-    BFree(tes);
-fail0:
-    return NCDVal_NewInvalid();
-}
-
-#endif
-
-#ifndef NCDVAL_TEST_EXTERNAL_STRINGS
-
 NCDValRef NCDVal_NewStringBin (NCDValMem *mem, const uint8_t *data, size_t len)
 {
     NCDVal__AssertMem(mem);
@@ -744,7 +745,7 @@ NCDValRef NCDVal_NewStringBin (NCDValMem *mem, const uint8_t *data, size_t len)
     }
     
     struct NCDVal__string *str_e = NCDValMem__BufAt(mem, idx);
-    str_e->type = make_type(NCDVAL_STRING, 0);
+    str_e->type = make_type(STOREDSTRING_TYPE, 0);
     str_e->length = len;
     if (len > 0) {
         memcpy(str_e->data, data, len);
@@ -757,8 +758,6 @@ fail:
     return NCDVal_NewInvalid();
 }
 
-#endif
-
 NCDValRef NCDVal_NewStringUninitialized (NCDValMem *mem, size_t len)
 {
     NCDVal__AssertMem(mem);
@@ -774,7 +773,7 @@ NCDValRef NCDVal_NewStringUninitialized (NCDValMem *mem, size_t len)
     }
     
     struct NCDVal__string *str_e = NCDValMem__BufAt(mem, idx);
-    str_e->type = make_type(NCDVAL_STRING, 0);
+    str_e->type = make_type(STOREDSTRING_TYPE, 0);
     str_e->length = len;
     str_e->data[len] = '\0';
     
@@ -843,13 +842,54 @@ fail:
     return NCDVal_NewInvalid();
 }
 
-const char * NCDVal_StringData (NCDValRef string)
+NCDValRef NCDVal_NewComposedString (NCDValMem *mem, struct NCDVal_string_resource resource, size_t offset, size_t length)
 {
-    ASSERT(NCDVal_IsString(string))
+    NCDVal__AssertMem(mem);
+    ASSERT(resource.func_getptr)
     
-    void *ptr = NCDValMem__BufAt(string.mem, string.idx);
+    NCDVal__idx size = sizeof(struct NCDVal__composedstring);
+    NCDVal__idx idx = NCDValMem__Alloc(mem, size, __alignof(struct NCDVal__composedstring));
+    if (idx < 0) {
+        goto fail;
+    }
+    
+    if (resource.ref_target) {
+        if (!NCDRefTarget_Ref(resource.ref_target)) {
+            goto fail;
+        }
+    }
+    
+    struct NCDVal__composedstring *cms_e = NCDValMem__BufAt(mem, idx);
+    cms_e->type = make_type(COMPOSEDSTRING_TYPE, 0);
+    cms_e->offset = offset;
+    cms_e->length = length;
+    cms_e->func_getptr = resource.func_getptr;
+    cms_e->user = resource.user;
+    cms_e->ref.target = resource.ref_target;
+    
+    if (resource.ref_target) {
+        cms_e->ref.next = mem->first_ref;
+        mem->first_ref = idx + offsetof(struct NCDVal__composedstring, ref);
+    }
+    
+    return NCDVal__Ref(mem, idx);
+    
+fail:
+    return NCDVal_NewInvalid();
+}
+
+const char * NCDVal_StringData (NCDValRef contstring)
+{
+    ASSERT(NCDVal_IsContinuousString(contstring))
+    
+    void *ptr = NCDValMem__BufAt(contstring.mem, contstring.idx);
     
     switch (get_internal_type(*(int *)ptr)) {
+        case STOREDSTRING_TYPE: {
+            struct NCDVal__string *str_e = ptr;
+            return str_e->data;
+        } break;
+        
         case IDSTRING_TYPE: {
             struct NCDVal__idstring *ids_e = ptr;
             const char *value = NCDStringIndex_Value(ids_e->string_index, ids_e->string_id);
@@ -860,10 +900,11 @@ const char * NCDVal_StringData (NCDValRef string)
             struct NCDVal__externalstring *exs_e = ptr;
             return exs_e->data;
         } break;
+        
+        default:
+            ASSERT(0);
+            return NULL;
     }
-    
-    struct NCDVal__string *str_e = ptr;
-    return str_e->data;
 }
 
 size_t NCDVal_StringLength (NCDValRef string)
@@ -873,6 +914,11 @@ size_t NCDVal_StringLength (NCDValRef string)
     void *ptr = NCDValMem__BufAt(string.mem, string.idx);
     
     switch (get_internal_type(*(int *)ptr)) {
+        case STOREDSTRING_TYPE: {
+            struct NCDVal__string *str_e = ptr;
+            return str_e->length;
+        } break;
+        
         case IDSTRING_TYPE: {
             struct NCDVal__idstring *ids_e = ptr;
             return NCDStringIndex_Length(ids_e->string_index, ids_e->string_id);
@@ -882,10 +928,62 @@ size_t NCDVal_StringLength (NCDValRef string)
             struct NCDVal__externalstring *exs_e = ptr;
             return exs_e->length;
         } break;
+        
+        case COMPOSEDSTRING_TYPE: {
+            struct NCDVal__composedstring *cms_e = ptr;
+            return cms_e->length;
+        } break;
+        
+        default:
+            ASSERT(0);
+            return 0;
+    }
+}
+
+void NCDVal_StringGetPtr (NCDValRef string, size_t offset, size_t max_length, const char **out_data, size_t *out_length)
+{
+    ASSERT(NCDVal_IsString(string))
+    ASSERT(offset <= NCDVal_StringLength(string))
+    ASSERT(out_data)
+    ASSERT(out_length)
+    
+    void *ptr = NCDValMem__BufAt(string.mem, string.idx);
+    
+    switch (get_internal_type(*(int *)ptr)) {
+        case STOREDSTRING_TYPE: {
+            struct NCDVal__string *str_e = ptr;
+            *out_data = str_e->data + offset;
+            *out_length = str_e->length - offset;
+        } break;
+        
+        case IDSTRING_TYPE: {
+            struct NCDVal__idstring *ids_e = ptr;
+            *out_data = NCDStringIndex_Value(ids_e->string_index, ids_e->string_id) + offset;
+            *out_length = NCDStringIndex_Length(ids_e->string_index, ids_e->string_id) - offset;
+        } break;
+        
+        case EXTERNALSTRING_TYPE: {
+            struct NCDVal__externalstring *exs_e = ptr;
+            *out_data = exs_e->data + offset;
+            *out_length = exs_e->length - offset;
+        } break;
+        
+        case COMPOSEDSTRING_TYPE: {
+            struct NCDVal__composedstring *cms_e = ptr;
+            cms_e->func_getptr(cms_e->user, cms_e->offset + offset, out_data, out_length);
+            ASSERT(*out_data)
+            ASSERT(offset == cms_e->length || *out_length > 0)
+        } break;
+        
+        default:
+            ASSERT(0);
+            *out_data = NULL;
+            *out_length = 0;
     }
     
-    struct NCDVal__string *str_e = ptr;;
-    return str_e->length;
+    if (*out_length > max_length) {
+        *out_length = max_length;
+    }
 }
 
 int NCDVal_StringNullTerminate (NCDValRef string, NCDValNullTermString *out)
@@ -896,6 +994,13 @@ int NCDVal_StringNullTerminate (NCDValRef string, NCDValNullTermString *out)
     void *ptr = NCDValMem__BufAt(string.mem, string.idx);
     
     switch (get_internal_type(*(int *)ptr)) {
+        case STOREDSTRING_TYPE: {
+            struct NCDVal__string *str_e = ptr;
+            out->data = str_e->data;
+            out->is_allocated = 0;
+            return 1;
+        } break;
+        
         case IDSTRING_TYPE: {
             struct NCDVal__idstring *ids_e = ptr;
             out->data = (char *)NCDStringIndex_Value(ids_e->string_index, ids_e->string_id);
@@ -915,12 +1020,32 @@ int NCDVal_StringNullTerminate (NCDValRef string, NCDValNullTermString *out)
             out->is_allocated = 1;
             return 1;
         } break;
+        
+        case COMPOSEDSTRING_TYPE: {
+            struct NCDVal__composedstring *cms_e = ptr;
+            size_t length = cms_e->length;
+            
+            if (length == SIZE_MAX) {
+                return 0;
+            }
+            
+            char *copy = BAlloc(length + 1);
+            if (!copy) {
+                return 0;
+            }
+            
+            NCDVal_StringCopyOut(string, 0, length, copy);
+            copy[length] = '\0';
+            
+            out->data = copy;
+            out->is_allocated = 1;
+            return 1;
+        } break;
+        
+        default:
+            ASSERT(0);
+            return 0;
     }
-    
-    struct NCDVal__string *str_e = ptr;
-    out->data = str_e->data;
-    out->is_allocated = 0;
-    return 1;
 }
 
 NCDValNullTermString NCDValNullTermString_NewDummy (void)
@@ -974,6 +1099,29 @@ NCDRefTarget * NCDVal_ExternalStringTarget (NCDValRef externalstring)
     return exs_e->ref.target;
 }
 
+struct NCDVal_string_resource NCDVal_ComposedStringResource (NCDValRef composedstring)
+{
+    ASSERT(NCDVal_IsComposedString(composedstring))
+    
+    struct NCDVal__composedstring *cms_e = NCDValMem__BufAt(composedstring.mem, composedstring.idx);
+    
+    struct NCDVal_string_resource res;
+    res.func_getptr = cms_e->func_getptr;
+    res.user = cms_e->user;
+    res.ref_target = cms_e->ref.target;
+    
+    return res;
+}
+
+size_t NCDVal_ComposedStringOffset (NCDValRef composedstring)
+{
+    ASSERT(NCDVal_IsComposedString(composedstring))
+    
+    struct NCDVal__composedstring *cms_e = NCDValMem__BufAt(composedstring.mem, composedstring.idx);
+    
+    return cms_e->offset;
+}
+
 int NCDVal_StringHasNulls (NCDValRef string)
 {
     ASSERT(NCDVal_IsString(string))
@@ -986,11 +1134,31 @@ int NCDVal_StringHasNulls (NCDValRef string)
             return NCDStringIndex_HasNulls(ids_e->string_index, ids_e->string_id);
         } break;
         
-        default: {
+        case STOREDSTRING_TYPE:
+        case EXTERNALSTRING_TYPE: {
             const char *data = NCDVal_StringData(string);
             size_t length = NCDVal_StringLength(string);
             return !!memchr(data, '\0', length);
         } break;
+        
+        case COMPOSEDSTRING_TYPE: {
+            size_t pos = 0;
+            size_t length = NCDVal_StringLength(string);
+            while (pos < length) {
+                const char *chunk_data;
+                size_t chunk_len;
+                NCDVal_StringGetPtr(string, pos, length - pos, &chunk_data, &chunk_len);
+                if (memchr(chunk_data, '\0', chunk_len)) {
+                    return 1;
+                }
+                pos += chunk_len;
+            }
+            return 0;
+        } break;
+        
+        default:
+            ASSERT(0);
+            return 0;
     }
 }
 
@@ -999,9 +1167,9 @@ int NCDVal_StringEquals (NCDValRef string, const char *data)
     ASSERT(NCDVal_IsString(string))
     ASSERT(data)
     
-    size_t len = strlen(data);
+    size_t data_len = strlen(data);
     
-    return NCDVal_StringLength(string) == len && !memcmp(NCDVal_StringData(string), data, len);
+    return NCDVal_StringLength(string) == data_len && NCDVal_StringRegionEquals(string, 0, data_len, data);
 }
 
 int NCDVal_StringEqualsId (NCDValRef string, NCD_string_id_t string_id,
@@ -1014,6 +1182,13 @@ int NCDVal_StringEqualsId (NCDValRef string, NCD_string_id_t string_id,
     void *ptr = NCDValMem__BufAt(string.mem, string.idx);
     
     switch (get_internal_type(*(int *)ptr)) {
+        case STOREDSTRING_TYPE: {
+            struct NCDVal__string *str_e = ptr;
+            const char *string_data = NCDStringIndex_Value(string_index, string_id);
+            size_t string_length = NCDStringIndex_Length(string_index, string_id);
+            return (string_length == str_e->length) && !memcmp(string_data, str_e->data, string_length);
+        } break;
+        
         case IDSTRING_TYPE: {
             struct NCDVal__idstring *ids_e = ptr;
             ASSERT(ids_e->string_index == string_index)
@@ -1026,12 +1201,102 @@ int NCDVal_StringEqualsId (NCDValRef string, NCD_string_id_t string_id,
             size_t string_length = NCDStringIndex_Length(string_index, string_id);
             return (string_length == exs_e->length) && !memcmp(string_data, exs_e->data, string_length);
         } break;
+        
+        case COMPOSEDSTRING_TYPE: {
+            struct NCDVal__composedstring *cms_e = ptr;
+            const char *string_data = NCDStringIndex_Value(string_index, string_id);
+            size_t string_length = NCDStringIndex_Length(string_index, string_id);
+            return (string_length == cms_e->length) && NCDVal_StringRegionEquals(string, 0, string_length, string_data);
+        } break;
+        
+        default:
+            ASSERT(0);
+            return 0;
+    }
+}
+
+int NCDVal_StringMemCmp (NCDValRef string1, NCDValRef string2, size_t start1, size_t start2, size_t length)
+{
+    ASSERT(NCDVal_IsString(string1))
+    ASSERT(NCDVal_IsString(string2))
+    ASSERT(start1 <= NCDVal_StringLength(string1))
+    ASSERT(start2 <= NCDVal_StringLength(string2))
+    ASSERT(length <= NCDVal_StringLength(string1) - start1)
+    ASSERT(length <= NCDVal_StringLength(string2) - start2)
+    
+    if (NCDVal_IsContinuousString(string1) && NCDVal_IsContinuousString(string2)) {
+        return memcmp(NCDVal_StringData(string1) + start1, NCDVal_StringData(string2) + start2, length);
     }
     
-    struct NCDVal__string *str_e = ptr;
-    const char *string_data = NCDStringIndex_Value(string_index, string_id);
-    size_t string_length = NCDStringIndex_Length(string_index, string_id);
-    return (string_length == str_e->length) && !memcmp(string_data, str_e->data, string_length);
+    size_t pos1 = 0;
+    while (pos1 < length) {
+        const char *chunk_data1;
+        size_t chunk_len1;
+        NCDVal_StringGetPtr(string1, start1 + pos1, length - pos1, &chunk_data1, &chunk_len1);
+        
+        size_t pos2 = 0;
+        while (pos2 < chunk_len1) {
+            const char *chunk_data2;
+            size_t chunk_len2;
+            NCDVal_StringGetPtr(string2, start2 + pos1 + pos2, chunk_len1 - pos2, &chunk_data2, &chunk_len2);
+            
+            int cmp = memcmp(chunk_data1 + pos2, chunk_data2, chunk_len2);
+            if (cmp) {
+                return cmp;
+            }
+            
+            pos2 += chunk_len2;
+        }
+        
+        pos1 += chunk_len1;
+    }
+    
+    return 0;
+}
+
+void NCDVal_StringCopyOut (NCDValRef string, size_t start, size_t length, char *dst)
+{
+    ASSERT(NCDVal_IsString(string))
+    ASSERT(start <= NCDVal_StringLength(string))
+    ASSERT(length <= NCDVal_StringLength(string) - start)
+    
+    if (NCDVal_IsContinuousString(string)) {
+        memcpy(dst, NCDVal_StringData(string) + start, length);
+        return;
+    }
+    
+    size_t pos = 0;
+    while (pos < length) {
+        const char *chunk_data;
+        size_t chunk_len;
+        NCDVal_StringGetPtr(string, start + pos, length - pos, &chunk_data, &chunk_len);
+        memcpy(dst + pos, chunk_data, chunk_len);
+        pos += chunk_len;
+    }
+}
+
+int NCDVal_StringRegionEquals (NCDValRef string, size_t start, size_t length, const char *data)
+{
+    ASSERT(NCDVal_IsString(string))
+    ASSERT(start <= NCDVal_StringLength(string))
+    ASSERT(length <= NCDVal_StringLength(string) - start)
+    
+    if (NCDVal_IsContinuousString(string)) {
+        return !memcmp(NCDVal_StringData(string) + start, data, length);
+    }
+    
+    size_t pos = 0;
+    while (pos < length) {
+        const char *chunk_data;
+        size_t chunk_len;
+        NCDVal_StringGetPtr(string, start + pos, length - pos, &chunk_data, &chunk_len);
+        if (memcmp(chunk_data, data + pos, chunk_len)) {
+            return 0;
+        }
+        pos += chunk_len;
+    }
+    
+    return 1;
 }
 
 int NCDVal_IsList (NCDValRef val)
@@ -1392,9 +1657,10 @@ static void replaceprog_build_recurser (NCDValMem *mem, NCDVal__idx idx, size_t
     struct NCDVal__instr instr;
     
     switch (get_internal_type(*((int *)(ptr)))) {
-        case NCDVAL_STRING:
+        case STOREDSTRING_TYPE:
         case IDSTRING_TYPE:
-        case EXTERNALSTRING_TYPE: {
+        case EXTERNALSTRING_TYPE:
+        case COMPOSEDSTRING_TYPE: {
         } break;
         
         case NCDVAL_LIST: {

+ 163 - 57
ncd/NCDVal.h

@@ -88,6 +88,15 @@ struct NCDVal__externalstring {
     struct NCDVal__ref ref;
 };
 
+struct NCDVal__composedstring {
+    int type;
+    size_t offset;
+    size_t length;
+    void (*func_getptr) (void *, size_t, const char **, size_t *);
+    void *user;
+    struct NCDVal__ref ref;
+};
+
 typedef struct {
     char *buf;
     NCDVal__idx size;
@@ -296,110 +305,186 @@ NCDValRef NCDVal_FromSafe (NCDValMem *mem, NCDValSafeRef sval);
 NCDValRef NCDVal_Moved (NCDValMem *mem, NCDValRef val);
 
 /**
- * Determines if a value is a string value.
+ * Determines if the value implements the String interface.
  * The value reference must not be an invalid reference.
  */
 int NCDVal_IsString (NCDValRef val);
 
 /**
- * Determines if a value is an ID-string value. See {@link NCDVal_NewIdString}
- * for an explanation of ID-string values.
+ * Determines if the value implements the ContinuousString interface.
+ * A ContinuousString also implements the String interface.
+ * The value reference must not be an invalid reference.
+ */
+int NCDVal_IsContinuousString (NCDValRef val);
+
+/**
+ * Determines if the value is a StoredString.
+ * A StoredString implements the ContinuousString interface.
+ * The value reference must not be an invalid reference.
+ */
+int NCDVal_IsStoredString (NCDValRef val);
+
+/**
+ * Determines if the value is an IdString. See {@link NCDVal_NewIdString}
+ * for details.
+ * An IdString implements the ContinuousString interface.
  * The value reference must not be an invalid reference.
  */
 int NCDVal_IsIdString (NCDValRef val);
 
 /**
- * Determines if a value is an external string value.
- * See {@link NCDVal_NewExternalString} for an explanation of external
- * string values.
+ * Determines if a value is an ExternalString.
+ * See {@link NCDVal_NewExternalString} for details.
+ * An ExternalString implements the ContinuousString interface.
  * The value reference must not be an invalid reference.
  */
 int NCDVal_IsExternalString (NCDValRef val);
 
 /**
- * Determines if a value is a string value which has no null bytes.
+ * Determines if a value is a ComposedString.
+ * A ComposedString implements the String interface.
+ */
+int NCDVal_IsComposedString (NCDValRef val);
+
+/**
+ * Determines if a value is a String which contains no null bytes.
  * The value reference must not be an invalid reference.
  */
 int NCDVal_IsStringNoNulls (NCDValRef val);
 
 /**
- * Builds a new string value from a null-terminated array of bytes.
  * Equivalent to NCDVal_NewStringBin(mem, data, strlen(data)).
- * Returns a reference to the new value, or an invalid reference
- * on out of memory.
- * WARNING: The buffer passed must NOT be part of any value in the
- * memory object specified. In particular, you may NOT use this
- * function to copy a string that resides in the same memory object.
  */
 NCDValRef NCDVal_NewString (NCDValMem *mem, const char *data);
 
 /**
- * Builds a new string value.
+ * Builds a new StoredString.
  * Returns a reference to the new value, or an invalid reference
  * on out of memory.
  * WARNING: The buffer passed must NOT be part of any value in the
  * memory object specified. In particular, you may NOT use this
  * function to copy a string that resides in the same memory object.
+ * 
+ * A StoredString is a kind of ContinuousString which is represented directly in the
+ * value memory object.
  */
 NCDValRef NCDVal_NewStringBin (NCDValMem *mem, const uint8_t *data, size_t len);
 
 /**
- * Builds a new string value of the given length with undefined contents.
+ * Builds a new StoredString of the given length with undefined contents.
  * You can define the contents of the string later by copying to the address
  * returned by {@link NCDVal_StringData}.
  */
 NCDValRef NCDVal_NewStringUninitialized (NCDValMem *mem, size_t len);
 
 /**
- * Builds a new ID-string value.
+ * Builds a new IdString.
  * Returns a reference to the new value, or an invalid reference
  * on out of memory.
  * 
- * An ID-string value is a special kind of string value which is represented
- * efficiently as a string identifier via {@link NCDStringIndex}. An ID-string
- * is also a string and is transparent for use. For example, for an ID-string,
- * {@link NCDVal_Type} still returns NCDVAL_STRING, {@link NCDVal_IsString}
- * returns 1, and {@link NCDVal_StringData} and {@link NCDVal_StringLength}
- * both work. The only way to distinguish an ID-string from a non-ID string is
- * by calling {@link NCDVal_IsIdString}.
+ * An IdString is a kind of ContinuousString which is represented efficiently as a string
+ * identifier via {@link NCDStringIndex}.
  */
 NCDValRef NCDVal_NewIdString (NCDValMem *mem, NCD_string_id_t string_id,
                               NCDStringIndex *string_index);
 
 /**
- * Builds a new string value pointing to the given external data. A reference to
+ * Builds a new ExternalString, pointing to the given external data. A reference to
  * the external data is taken using {@link NCDRefTarget}, unless 'ref_target' is
- * NULL. The data must not change while this value exists. Like ID-strings,
- * external strings are transparent for use. An external string can be recognized
- * using {@link NCDVal_IsExternalString}.
- * 
+ * NULL. The data must not change while this value exists.
  * Returns a reference to the new value, or an invalid reference
  * on out of memory.
+ * 
+ * An ExternalString is a kind of ContinuousString where the actual string contents are
+ * stored outside of the value memory object.
  */
 NCDValRef NCDVal_NewExternalString (NCDValMem *mem, const char *data, size_t len,
                                     NCDRefTarget *ref_target);
 
 /**
- * Returns a pointer to the data of a string value.
+ * Callback function which is called by {@link NCDVal_StringGetPtr} for ComposedString's to
+ * access the underlying string resource.
+ * \a user is whatever was passed to 'resource.user' in {@link NCDVal_NewComposedString}.
+ * \a offset is the offset from the beginning of the string exposed by the resource; it will be
+ * >= 'offset' and <= 'offset' + 'length' as given to NCDVal_NewComposedString.
+ * This callback must set *\a out_data and *\a out_length to represent a continuous (sub-)region
+ * of the string that starts at the byte at index \a offset. The pointed-to data must remain
+ * valid and unchanged until all references to the string resource are released.
+ * \a *out_data must be set to non-NULL even if there is no more data in the resource.
+ */
+typedef void (*NCDVal_ComposedString_func_getptr) (void *user, size_t offset, const char **out_data, size_t *out_length);
+
+/**
+ * Structure representing a string resource used by ComposedString's,
+ * to simplify {@link NCDVal_NewComposedString} and {@link NCDVal_ComposedStringResource}.
+ */
+struct NCDVal_string_resource {
+    NCDVal_ComposedString_func_getptr func_getptr;
+    void *user;
+    NCDRefTarget *ref_target;
+};
+
+/**
+ * Builds a new ComposedString from a string resource.
+ * A reference to the underlying string resource via the {@link NCDRefTarget} object
+ * specified in 'resource.ref_target'.
+ * 
+ * A ComposedString is a kind of String with an abstract representation exposed via the
+ * {@link NCDVal_ComposedString_func_getptr} callback.
+ */
+NCDValRef NCDVal_NewComposedString (NCDValMem *mem, struct NCDVal_string_resource resource, size_t offset, size_t length);
+
+/**
+ * Returns a pointer to the data of a ContinuousString.
  * WARNING: the string data may not be null-terminated. To get a null-terminated
  * version, use {@link NCDVal_StringNullTerminate}.
- * The value reference must point to a string value.
+ * The value reference must point to a ContinuousString.
  */
-const char * NCDVal_StringData (NCDValRef string);
+const char * NCDVal_StringData (NCDValRef contstring);
 
 /**
- * Returns the length of the string value.
- * The value reference must point to a string value.
+ * Returns the length of a String.
+ * The value reference must point to a String.
  */
 size_t NCDVal_StringLength (NCDValRef string);
 
 /**
- * Produces a null-terminated version of a string value. On success, the result is
- * stored into a {@link NCDValNullTermString} structure, and the null-terminated
+ * Returns a pointer into a continuous chunk of data within a String.
+ * The \a offset must be lesser or equal to the length of the string.
+ * Both \a out_data and \a out_length must be non-NULL. *\a out_data will be set to point
+ * into a continuous data chunk starting at \a offset from the beginning of the string, and
+ * *\a out_length will be set to the number of bytes which are available from that pointer,
+ * and to no more than \a max_length.
+ * 
+ * It is only guaranteed that:
+ * - if offset < length_of_string and max_length > 0, then *out_length > 0,
+ * - *out_length <= max_length.
+ * 
+ * This means that:
+ * - *out_length may be smaller than the remainder of the string,
+ * - *out_length may be larger than length_of_string - offset, unless limited by max_length.
+ * 
+ * For clarification, the following code is provided which prints the entire string
+ * to standard output.
+ * 
+ * size_t pos = 0;
+ * size_t length = NCDVal_StringLength(string);
+ * while (pos < length) {
+ *     const char *chunk_data;
+ *     size_t chunk_len;
+ *     NCDVal_StringGetPtr(string, pos, length - pos, &chunk_data, &chunk_len);
+ *     fwrite(chunk_data, 1, chunk_len, stdout);
+ *     pos += chunk_len;
+ * }
+ */
+void NCDVal_StringGetPtr (NCDValRef string, size_t offset, size_t max_length, const char **out_data, size_t *out_length);
+
+/**
+ * Produces a null-terminated continuous version of a String. On success, the result is
+ * stored into an {@link NCDValNullTermString} structure, and the null-terminated
  * string is available via its 'data' member. This function may either simply pass
- * through the data pointer as returned by {@link NCDVal_StringData} (if the string
- * is known to be null-terminated) or produce a null-terminated dynamically allocated
- * copy.
+ * through the data pointer (if the string is known to be continuous and null-terminated) or
+ * produce a null-terminated dynamically allocated copy.
  * On success, {@link NCDValNullTermString_Free} should be called to release any allocated
  * memory when the null-terminated string is no longer needed. This must be called before
  * the memory object is freed, because it may point to data inside the memory object.
@@ -421,59 +506,80 @@ NCDValNullTermString NCDValNullTermString_NewDummy (void);
 void NCDValNullTermString_Free (NCDValNullTermString *o);
 
 /**
- * Returns the string ID and the string index of an ID-string.
- * The value given must be an ID-string value (which can be determined via
- * {@link NCDVal_IsIdString}). Both the \a out_string_id and \a out_string_index
- * pointers must be non-NULL.
+ * Returns the string ID and the string index of an IdString.
+ * Both the \a out_string_id and \a out_string_index pointers must be non-NULL.
  */
 void NCDVal_IdStringGet (NCDValRef idstring, NCD_string_id_t *out_string_id,
                          NCDStringIndex **out_string_index);
 
 /**
- * Returns the string ID of an ID-string.
- * The value given must be an ID-string value (which can be determined via
- * {@link NCDVal_IsIdString}).
+ * Returns the string ID of an IdString.
  */
 NCD_string_id_t NCDVal_IdStringId (NCDValRef idstring);
 
 /**
- * Returns the string index of an ID-string.
- * The value given must be an ID-string value (which can be determined via
- * {@link NCDVal_IsIdString}).
+ * Returns the string index of an IdString.
  */
 NCDStringIndex * NCDVal_IdStringStringIndex (NCDValRef idstring);
 
 /**
- * Returns the reference target of an external string. This may be NULL
+ * Returns the reference target of an ExternalString. This may be NULL
  * if the external string is not associated with a reference target.
- * The value given must be an external string value (which can be determined
- * via {@link NCDVal_IsExternalString}).
  */
 NCDRefTarget * NCDVal_ExternalStringTarget (NCDValRef externalstring);
 
 /**
- * Determines if the string value has any null bytes in its contents.
- * The value reference must point to a string value.
+ * Returns the underlying string resource of a ComposedString.
+ */
+struct NCDVal_string_resource NCDVal_ComposedStringResource (NCDValRef composedstring);
+
+/**
+ * Returns the resource offset of a ComposedString.
+ */
+size_t NCDVal_ComposedStringOffset (NCDValRef composedstring);
+
+/**
+ * Determines if the String has any null bytes in its contents.
  */
 int NCDVal_StringHasNulls (NCDValRef string);
 
 /**
- * Determines if the string value is equal to the given null-terminated
+ * Determines if the String value is equal to the given null-terminated
  * string.
- * The value reference must point to a string value.
+ * The value reference must point to a String value.
  */
 int NCDVal_StringEquals (NCDValRef string, const char *data);
 
 /**
- * Determines if the string value is equal to the given string represented
+ * Determines if the String is equal to the given string represented
  * by an {@link NCDStringIndex} identifier.
- * The value reference must point to a string value.
  * NOTE: \a string_index must be equal to the string_index of every ID-string
  * that exist within this memory object.
  */
 int NCDVal_StringEqualsId (NCDValRef string, NCD_string_id_t string_id,
                            NCDStringIndex *string_index);
 
+/**
+ * Compares two String's in a manner similar to memcmp().
+ * The startN and length arguments must refer to a valid region within
+ * stringN, i.e. startN + length <= length_of_stringN must hold.
+ */
+int NCDVal_StringMemCmp (NCDValRef string1, NCDValRef string2, size_t start1, size_t start2, size_t length);
+
+/**
+ * Copies a part of a String to a buffer.
+ * \a start and \a length must refer to a valid region within the string,
+ * i.e. start + length <= length_of_string must hold.
+ */
+void NCDVal_StringCopyOut (NCDValRef string, size_t start, size_t length, char *dst);
+
+/**
+ * Determines if a part of a String is equal to the \a length bytes in \a data.
+ * \a start and \a length must refer to a valid region within the string,
+ * i.e. start + length <= length_of_string must hold.
+ */
+int NCDVal_StringRegionEquals (NCDValRef string, size_t start, size_t length, const char *data);
+
 /**
  * Determines if a value is a list value.
  * The value reference must not be an invalid reference.