add support for line continuation in the properties parser - resolves #457

Wed, 10 Dec 2025 13:12:27 +0100

author
Olaf Wintermann <olaf.wintermann@gmail.com>
date
Wed, 10 Dec 2025 13:12:27 +0100
changeset 1562
f2b63cad2142
parent 1561
fcebf53de51c
child 1563
6e2f83ef6f5a

add support for line continuation in the properties parser - resolves #457

src/cx/properties.h file | annotate | diff | comparison | revisions
src/properties.c file | annotate | diff | comparison | revisions
tests/test_properties.c file | annotate | diff | comparison | revisions
--- a/src/cx/properties.h	Tue Dec 09 19:05:35 2025 +0100
+++ b/src/cx/properties.h	Wed Dec 10 13:12:27 2025 +0100
@@ -77,9 +77,6 @@
      * The character, when appearing at the end of a line, continues that line.
      * This is '\' by default.
      */
-    /**
-     * Reserved for future use.
-     */
     char continuation;
 };
 
--- a/src/properties.c	Tue Dec 09 19:05:35 2025 +0100
+++ b/src/properties.c	Wed Dec 10 13:12:27 2025 +0100
@@ -31,12 +31,13 @@
 #include <assert.h>
 #include <stdio.h>
 #include <string.h>
+#include <ctype.h>
 
 const CxPropertiesConfig cx_properties_config_default = {
-        '=',
-        '#',
-        '\0',
-        '\0',
+    '=',
+    '#',
+    '\0',
+    '\0',
     '\\',
 };
 
@@ -96,13 +97,30 @@
 
     // a pointer to the buffer we want to read from
     CxBuffer *current_buffer = &prop->input;
-
+    
+    char comment1 = prop->config.comment1;
+    char comment2 = prop->config.comment2;
+    char comment3 = prop->config.comment3;
+    char delimiter = prop->config.delimiter;
+    char continuation = prop->config.continuation;
+    
     // check if we have rescued data
     if (!cxBufferEof(&prop->buffer)) {
         // check if we can now get a complete line
         cxstring input = cx_strn(prop->input.space + prop->input.pos,
             prop->input.size - prop->input.pos);
         cxstring nl = cx_strchr(input, '\n');
+        while (nl.length > 0) {
+            // check for line continuation
+            char previous = nl.ptr > input.ptr ? nl.ptr[-1] : prop->buffer.space[prop->buffer.size-1];
+            if (previous == continuation) {
+                // this nl is a line continuation, check the next newline
+                nl = cx_strchr(cx_strsubs(nl, 1), '\n');
+            } else {
+                break;
+            }
+        }
+        
         if (nl.length > 0) {
             // we add as much data to the rescue buffer as we need
             // to complete the line
@@ -129,12 +147,7 @@
             return CX_PROPERTIES_INCOMPLETE_DATA;
         }
     }
-
-    char comment1 = prop->config.comment1;
-    char comment2 = prop->config.comment2;
-    char comment3 = prop->config.comment3;
-    char delimiter = prop->config.delimiter;
-
+   
     // get one line and parse it
     while (!cxBufferEof(current_buffer)) {
         const char *buf = current_buffer->space + current_buffer->pos;
@@ -147,6 +160,7 @@
         size_t delimiter_index = 0;
         size_t comment_index = 0;
         bool has_comment = false;
+        bool has_continuation = false;
 
         size_t i = 0;
         char c = 0;
@@ -161,6 +175,9 @@
                 if (delimiter_index == 0 && !has_comment) {
                     delimiter_index = i;
                 }
+            } else if (delimiter_index > 0 && c == continuation && i+1 < len && buf[i+1] == '\n') {
+                has_continuation = true;
+                i++;
             } else if (c == '\n') {
                 break;
             }
@@ -225,10 +242,53 @@
             k = cx_strtrim(k);
             val = cx_strtrim(val);
             if (k.length > 0) {
+                current_buffer->pos += i + 1; 
+                assert(current_buffer->pos <= current_buffer->size);
+                assert(current_buffer != &prop->buffer || current_buffer->pos == current_buffer->size);
+                
+                if (has_continuation) {
+                    char *ptr = (char*)val.ptr;
+                    if (current_buffer != &prop->buffer) {
+                        // move value to the rescue buffer
+                        if (prop->buffer.space == NULL) {
+                            cxBufferInit(&prop->buffer, NULL, 256, NULL, CX_BUFFER_AUTO_EXTEND);
+                        }
+                        prop->buffer.size = 0;
+                        prop->buffer.pos = 0;
+                        if (cxBufferWrite(val.ptr, 1, val.length, &prop->buffer) != val.length) {
+                            return CX_PROPERTIES_BUFFER_ALLOC_FAILED;
+                        }
+                        val.ptr = prop->buffer.space;
+                        ptr = prop->buffer.space;
+                    }
+                    // value.ptr is now inside the rescue buffer and we can
+                    // remove the continuation character from the value
+                    bool trim = false;
+                    size_t x = 0;
+                    for(size_t j=0;j<val.length;j++) {
+                        c = ptr[j];
+                        if (j+1 < val.length && c == '\\' && ptr[j+1] == '\n') {
+                            // skip continuation and newline character
+                            j++;
+                            trim = true; // enable trim in the next line
+                            continue;
+                        }
+                        if (j > x) {
+                            if (trim) {
+                                if (isspace((unsigned char)c)) {
+                                    continue;
+                                }
+                                trim = false;
+                            }
+                            ptr[x] = c;
+                        }
+                        x++;
+                    }
+                    val.length = x;
+                }
                 *key = k;
                 *value = val;
-                current_buffer->pos += i + 1;
-                assert(current_buffer->pos <= current_buffer->size);
+                
                 return CX_PROPERTIES_NO_ERROR;
             } else {
                 return CX_PROPERTIES_INVALID_EMPTY_KEY;
--- a/tests/test_properties.c	Tue Dec 09 19:05:35 2025 +0100
+++ b/tests/test_properties.c	Wed Dec 10 13:12:27 2025 +0100
@@ -41,6 +41,7 @@
         CX_TEST_ASSERT(prop.config.comment1 == '#');
         CX_TEST_ASSERT(prop.config.comment2 == 0);
         CX_TEST_ASSERT(prop.config.comment3 == 0);
+        CX_TEST_ASSERT(prop.config.continuation == '\\');
         CX_TEST_ASSERT(prop.input.space == NULL);
         CX_TEST_ASSERT(prop.buffer.space == NULL);
 
@@ -383,6 +384,108 @@
     free(long_value);
 }
 
+CX_TEST(test_properties_next_line_continuation) {
+    const char *str = 
+        "key1 = multiline \\\nvalue\n"
+        "key2 = normal\n"
+        "key3 = multiline \\\n  trim  \n"
+        "key4 = m1\\\nm2\\\n  m3\\\nm4  \n"
+        "key5 = no\\continuation\n";
+    
+    CxProperties prop;
+    cxPropertiesInitDefault(&prop);
+    
+    cxstring key;
+    cxstring value;
+    
+    CX_TEST_DO {
+        CX_TEST_ASSERT(0 == cxPropertiesFill(&prop, str));
+        
+        CX_TEST_ASSERT(cxPropertiesNext(&prop, &key,  &value) == CX_PROPERTIES_NO_ERROR);
+        CX_TEST_ASSERT(!cx_strcmp(key, "key1"));
+        CX_TEST_ASSERT(!cx_strcmp(value, "multiline value"));
+        
+        CX_TEST_ASSERT(cxPropertiesNext(&prop, &key,  &value) == CX_PROPERTIES_NO_ERROR);
+        CX_TEST_ASSERT(!cx_strcmp(key, "key2"));
+        CX_TEST_ASSERT(!cx_strcmp(value, "normal"));
+        
+        CX_TEST_ASSERT(cxPropertiesNext(&prop, &key,  &value) == CX_PROPERTIES_NO_ERROR);
+        CX_TEST_ASSERT(!cx_strcmp(key, "key3"));
+        CX_TEST_ASSERT(!cx_strcmp(value, "multiline trim"));
+        
+        CX_TEST_ASSERT(cxPropertiesNext(&prop, &key,  &value) == CX_PROPERTIES_NO_ERROR);
+        CX_TEST_ASSERT(!cx_strcmp(key, "key4"));
+        CX_TEST_ASSERT(!cx_strcmp(value, "m1m2m3m4"));
+        
+        CX_TEST_ASSERT(cxPropertiesNext(&prop, &key,  &value) == CX_PROPERTIES_NO_ERROR);
+        CX_TEST_ASSERT(!cx_strcmp(key, "key5"));
+        CX_TEST_ASSERT(!cx_strcmp(value, "no\\continuation"));
+    }
+    
+    cxPropertiesDestroy(&prop);
+}
+
+CX_TEST(test_properties_next_line_continuation_part) {
+    CxProperties prop;
+    cxPropertiesInitDefault(&prop);
+    
+    cxstring key;
+    cxstring value;
+    CxPropertiesStatus result;
+    const char *str;
+    
+    CX_TEST_DO {
+        // key1 = continue continue ...line
+        str = "key1 ";
+        CX_TEST_ASSERT(0 == cxPropertiesFill(&prop, str));
+        result = cxPropertiesNext(&prop, &key,  &value);
+        CX_TEST_ASSERT(result == CX_PROPERTIES_INCOMPLETE_DATA);
+        
+        str = "= continue \\";
+        CX_TEST_ASSERT(0 == cxPropertiesFill(&prop, str));
+        result = cxPropertiesNext(&prop, &key,  &value);
+        CX_TEST_ASSERT(result == CX_PROPERTIES_INCOMPLETE_DATA);
+        
+        str = "\ncontinue \\\n";
+        CX_TEST_ASSERT(0 == cxPropertiesFill(&prop, str));
+        result = cxPropertiesNext(&prop, &key,  &value);
+        CX_TEST_ASSERT(result == CX_PROPERTIES_INCOMPLETE_DATA);
+        
+        str = "...";
+        CX_TEST_ASSERT(0 == cxPropertiesFill(&prop, str));
+        result = cxPropertiesNext(&prop, &key,  &value);
+        CX_TEST_ASSERT(result == CX_PROPERTIES_INCOMPLETE_DATA);
+        
+        str = "line\nkey2 = value2\n";
+        CX_TEST_ASSERT(0 == cxPropertiesFill(&prop, str));
+        result = cxPropertiesNext(&prop, &key,  &value);
+        CX_TEST_ASSERT(result == CX_PROPERTIES_NO_ERROR);
+        CX_TEST_ASSERT(!cx_strcmp(key, "key1"));
+        CX_TEST_ASSERT(!cx_strcmp(value, "continue continue ...line"));
+        
+        // key2 = value2
+        result = cxPropertiesNext(&prop, &key,  &value);
+        CX_TEST_ASSERT(!cx_strcmp(key, "key2"));
+        CX_TEST_ASSERT(!cx_strcmp(value, "value2"));
+        
+        // key3 = continue-line
+        str = "key3=\\\ncontinue-\\\n   line";
+        size_t len = strlen(str);
+        for(size_t i=0;i<len;i++) {
+            CX_TEST_ASSERT(0 == cxPropertiesFill(&prop, cx_strn(str+i, 1)));
+            result = cxPropertiesNext(&prop, &key,  &value);
+            CX_TEST_ASSERT(result == CX_PROPERTIES_INCOMPLETE_DATA);
+        }
+        CX_TEST_ASSERT(0 == cxPropertiesFill(&prop, "\n"));
+        result = cxPropertiesNext(&prop, &key,  &value);
+        CX_TEST_ASSERT(result == CX_PROPERTIES_NO_ERROR);
+        CX_TEST_ASSERT(!cx_strcmp(key, "key3"));
+        CX_TEST_ASSERT(!cx_strcmp(value, "continue-line"));
+    }
+    
+    cxPropertiesDestroy(&prop);
+}
+
 CX_TEST(test_properties_load) {
     CxTestingAllocator talloc;
     cx_testing_allocator_init(&talloc);
@@ -710,6 +813,8 @@
     cx_test_register(suite, test_properties_next_multi);
     cx_test_register(suite, test_properties_next_part);
     cx_test_register(suite, test_properties_next_long_lines);
+    cx_test_register(suite, test_properties_next_line_continuation);
+    cx_test_register(suite, test_properties_next_line_continuation_part);
     cx_test_register(suite, test_properties_load);
     cx_test_register(suite, test_properties_load_empty_file);
     cx_test_register(suite, test_properties_load_only_comments);

mercurial