# HG changeset patch # User Mike Becker # Date 1766660857 -3600 # Node ID cf19b7820ff06effe2e3da350465ee07f1882b4b # Parent 56f55f2f70c05f3860b501770e5e9b26265b57c6 simplify cx_strtok_next() by removing the _m() variant - relates to #792 diff -r 56f55f2f70c0 -r cf19b7820ff0 docs/Writerside/topics/string.h.md --- a/docs/Writerside/topics/string.h.md Thu Dec 25 11:48:25 2025 +0100 +++ b/docs/Writerside/topics/string.h.md Thu Dec 25 12:07:37 2025 +0100 @@ -268,13 +268,11 @@ void cx_strtok_delim(CxStrtokCtx *ctx, const cxstring *delim, size_t count); -bool cx_strtok_next(CxStrtokCtx *ctx, cxstring *token); - -bool cx_strtok_next_m(CxStrtokCtx *ctx, cxmutstr *token); +bool cx_strtok_next(CxStrtokCtx *ctx, UcxStr* token); ``` You can tokenize a string by creating a _tokenization_ context with `cx_strtok()`, -and calling `cx_strtok_next()` or `cx_strtok_next_m()` as long as they return `true`. +and calling `cx_strtok_next()` as long as it returns `true`. The tokenization context is initialized with the string `str` to tokenize, one delimiter `delim`, and a `limit` for the maximum number of tokens. @@ -283,10 +281,10 @@ You can add additional delimiters to the context by calling `cx_strtok_delim()`, and specifying an array of delimiters to use. -> Regardless of how the context was initialized, you can use either `cx_strtok_next()` -> or `cx_strtok_next_m()` to retrieve the tokens. However, keep in mind that modifying -> characters in a token returned by `cx_strtok_next_m()` has only defined behavior, when the -> underlying `str` is a `cxmutstr`. +> Regardless of how the context was initialized, you can use `cx_strtok_next()` +> with pointers to `cxstring` or `cxmutstr`. However, keep in mind that modifying +> characters in a `cxmutstr` has only defined behavior, when the +> underlying `str` is also a `cxmutstr` that was not initalized with constant memory. ### Example diff -r 56f55f2f70c0 -r cf19b7820ff0 src/cx/string.h --- a/src/cx/string.h Thu Dec 25 11:48:25 2025 +0100 +++ b/src/cx/string.h Thu Dec 25 12:07:37 2025 +0100 @@ -1189,24 +1189,35 @@ * has been reached */ cx_attr_nonnull cx_attr_nodiscard cx_attr_access_w(2) -CX_EXPORT bool cx_strtok_next(CxStrtokCtx *ctx, cxstring *token); +CX_EXPORT bool cx_strtok_next_(CxStrtokCtx *ctx, cxstring *token); +#ifdef __cplusplus +} // extern "C" +CX_CPPDECL cx_strtok_next(CxStrtokCtx *ctx, cxstring *token) { + return cx_strtok_next_(ctx, token); +} +CX_CPPDECL cx_strtok_next(CxStrtokCtx *ctx, cxmutstr *token) { + // Note: this is actually UB - fixed with start_lifetime_as() in C++23 + // but it works on all supported platforms + return cx_strtok_next_(ctx, reinterpret_cast(token)); +} +extern "C" { +#else // ! __cplusplus /** - * Returns the next token of a mutable string. + * Returns the next token. * * The token will point to the source string. * - * @attention - * If the context was not initialized over a mutable string, modifying - * the data of the returned token is undefined behavior. - * - * @param ctx the tokenization context - * @param token a pointer to memory where the next token shall be stored + * @param ctx (@c CxStrtokCtx*) the tokenization context + * @param token a pointer to either a @c cxstring or @c cxmutstr + * where the next token shall be stored * @return true if successful, false if the limit or the end of the string * has been reached */ -cx_attr_nonnull cx_attr_nodiscard cx_attr_access_w(2) -CX_EXPORT bool cx_strtok_next_m(CxStrtokCtx *ctx, cxmutstr *token); +#define cx_strtok_next(ctx, token) _Generic((token), \ + cxstring*: cx_strtok_next_, \ + cxmutstr*: cx_strtok_next_)(ctx, (cxstring*)token) +#endif /** * Defines an array of more delimiters for the specified tokenization context. diff -r 56f55f2f70c0 -r cf19b7820ff0 src/string.c --- a/src/string.c Thu Dec 25 11:48:25 2025 +0100 +++ b/src/string.c Thu Dec 25 12:07:37 2025 +0100 @@ -644,7 +644,7 @@ return ctx; } -bool cx_strtok_next( +bool cx_strtok_next_( CxStrtokCtx *ctx, cxstring *token ) { @@ -687,13 +687,6 @@ return true; } -bool cx_strtok_next_m( - CxStrtokCtx *ctx, - cxmutstr *token -) { - return cx_strtok_next(ctx, (cxstring *) token); -} - void cx_strtok_delim( CxStrtokCtx *ctx, const cxstring *delim, diff -r 56f55f2f70c0 -r cf19b7820ff0 tests/test_string.c --- a/tests/test_string.c Thu Dec 25 11:48:25 2025 +0100 +++ b/tests/test_string.c Thu Dec 25 12:07:37 2025 +0100 @@ -1025,7 +1025,7 @@ bool ret; cxmutstr tok; - ret = cx_strtok_next_m(&ctx, &tok); + ret = cx_strtok_next(&ctx, &tok); CX_TEST_ASSERT(ret); CX_TEST_ASSERT(0 == cx_strcmp(tok, "an")); CX_TEST_ASSERT(ctx.pos == 0); @@ -1034,7 +1034,7 @@ CX_TEST_ASSERT(ctx.found == 1); test_toupper(tok); - ret = cx_strtok_next_m(&ctx, &tok); + ret = cx_strtok_next(&ctx, &tok); CX_TEST_ASSERT(ret); CX_TEST_ASSERT(0 == cx_strcmp(tok, "arbitrarily")); CX_TEST_ASSERT(ctx.pos == 3); @@ -1043,7 +1043,7 @@ CX_TEST_ASSERT(ctx.found == 2); test_toupper(tok); - ret = cx_strtok_next_m(&ctx, &tok); + ret = cx_strtok_next(&ctx, &tok); CX_TEST_ASSERT(ret); CX_TEST_ASSERT(0 == cx_strcmp(tok, "")); CX_TEST_ASSERT(ctx.pos == 15); @@ -1052,7 +1052,7 @@ CX_TEST_ASSERT(ctx.found == 3); test_toupper(tok); - ret = cx_strtok_next_m(&ctx, &tok); + ret = cx_strtok_next(&ctx, &tok); CX_TEST_ASSERT(ret); CX_TEST_ASSERT(0 == cx_strcmp(tok, "separated")); CX_TEST_ASSERT(ctx.pos == 17); @@ -1061,7 +1061,7 @@ CX_TEST_ASSERT(ctx.found == 4); test_toupper(tok); - ret = cx_strtok_next_m(&ctx, &tok); + ret = cx_strtok_next(&ctx, &tok); CX_TEST_ASSERT(ret); CX_TEST_ASSERT(0 == cx_strcmp(tok, "string")); CX_TEST_ASSERT(ctx.pos == 27); @@ -1070,7 +1070,7 @@ CX_TEST_ASSERT(ctx.found == 5); test_toupper(tok); - ret = cx_strtok_next_m(&ctx, &tok); + ret = cx_strtok_next(&ctx, &tok); CX_TEST_ASSERT(!ret); CX_TEST_ASSERT(ctx.pos == 27); CX_TEST_ASSERT(ctx.next_pos == 33);