diff --git a/third_party/rapidyaml/README.md b/third_party/rapidyaml/README.md index 685580b37..274ebc36c 100644 --- a/third_party/rapidyaml/README.md +++ b/third_party/rapidyaml/README.md @@ -1,8 +1,8 @@ # Vendoring This uses the 'single header' release of Rapid YAML. -Download from: https://github.com/biojppm/rapidyaml/releases/tag/v0.5.0 +Download from: https://github.com/biojppm/rapidyaml/releases/tag/v0.7.2 -`rapidyaml-0.5.0.hpp` (renamed to `ryml_all.hpp`) +`rapidyaml-0.7.2.hpp` (renamed to `ryml_all.hpp`) rapidyaml.cpp instantiates the library as a single translation unit. diff --git a/third_party/rapidyaml/ryml_all.hpp b/third_party/rapidyaml/ryml_all.hpp index e4b327555..c611a0bcc 100644 --- a/third_party/rapidyaml/ryml_all.hpp +++ b/third_party/rapidyaml/ryml_all.hpp @@ -10,14 +10,25 @@ // This is an amalgamated single-header version of the library. // // INSTRUCTIONS: -// - Include at will in any header of your project -// - In one (and only one) of your project source files, -// #define RYML_SINGLE_HDR_DEFINE_NOW and then include this header. -// This will enable the function and class definitions in -// the header file. -// - To compile into a shared library, just define the -// preprocessor symbol RYML_SHARED . This will take -// care of symbol export/import. +// +// - Include at will in any header of your project. Because the +// amalgamated header file is large, to speed up compilation of +// your project, protect the include with its include guard +// `_RYML_SINGLE_HEADER_AMALGAMATED_HPP_`, ie like this: +// ``` +// #ifndef _RYML_SINGLE_HEADER_AMALGAMATED_HPP_ +// #include +// #endif +// ``` +// +// - In one (and only one) of your project source files, #define +// RYML_SINGLE_HDR_DEFINE_NOW and then include this header. This will enable +// the function and class definitions in the header file. +// +// - To compile into a shared library, define the preprocessor symbol +// RYML_SHARED before including the header. This will take care of +// symbol export/import. +// // @@ -350,6 +361,8 @@ C4_FOR_EACH(PRN_STRUCT_OFFSETS, a, b, c); # define C4_UNIX #elif defined(__arm__) || defined(__aarch64__) # define C4_ARM +#elif defined(__xtensa__) || defined(__XTENSA__) +# define C4_XTENSA #elif defined(SWIG) # define C4_SWIG #else @@ -387,100 +400,100 @@ C4_FOR_EACH(PRN_STRUCT_OFFSETS, a, b, c); // see http://code.qt.io/cgit/qt/qtbase.git/tree/src/corelib/global/qprocessordetection.h #ifdef __ORDER_LITTLE_ENDIAN__ - #define _C4EL __ORDER_LITTLE_ENDIAN__ +# define _C4EL __ORDER_LITTLE_ENDIAN__ #else - #define _C4EL 1234 +# define _C4EL 1234 #endif #ifdef __ORDER_BIG_ENDIAN__ - #define _C4EB __ORDER_BIG_ENDIAN__ +# define _C4EB __ORDER_BIG_ENDIAN__ #else - #define _C4EB 4321 +# define _C4EB 4321 #endif // mixed byte order (eg, PowerPC or ia64) #define _C4EM 1111 #if defined(__x86_64) || defined(__x86_64__) || defined(__amd64) || defined(_M_X64) - #define C4_CPU_X86_64 - #define C4_WORDSIZE 8 - #define C4_BYTE_ORDER _C4EL +# define C4_CPU_X86_64 +# define C4_WORDSIZE 8 +# define C4_BYTE_ORDER _C4EL #elif defined(__i386) || defined(__i386__) || defined(_M_IX86) - #define C4_CPU_X86 - #define C4_WORDSIZE 4 - #define C4_BYTE_ORDER _C4EL +# define C4_CPU_X86 +# define C4_WORDSIZE 4 +# define C4_BYTE_ORDER _C4EL #elif defined(__arm__) || defined(_M_ARM) \ || defined(__TARGET_ARCH_ARM) || defined(__aarch64__) || defined(_M_ARM64) - #if defined(__aarch64__) || defined(_M_ARM64) - #define C4_CPU_ARM64 - #define C4_CPU_ARMV8 - #define C4_WORDSIZE 8 - #else - #define C4_CPU_ARM - #define C4_WORDSIZE 4 - #if defined(__ARM_ARCH_8__) || defined(__ARM_ARCH_8A__) \ - || (defined(__ARCH_ARM) && __ARCH_ARM >= 8) - || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM >= 8) \ - #define C4_CPU_ARMV8 - #elif defined(__ARM_ARCH_7__) || defined(_ARM_ARCH_7) \ +# if defined(__aarch64__) || defined(_M_ARM64) +# define C4_CPU_ARM64 +# define C4_CPU_ARMV8 +# define C4_WORDSIZE 8 +# else +# define C4_CPU_ARM +# define C4_WORDSIZE 4 +# if defined(__ARM_ARCH_8__) || defined(__ARM_ARCH_8A__) \ + || (defined(__ARCH_ARM) && __ARCH_ARM >= 8) \ + || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM >= 8) +# define C4_CPU_ARMV8 +# elif defined(__ARM_ARCH_7__) || defined(_ARM_ARCH_7) \ || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) \ || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) \ || defined(__ARM_ARCH_7EM__) \ || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM >= 7) \ || (defined(_M_ARM) && _M_ARM >= 7) - #define C4_CPU_ARMV7 - #elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ +# define C4_CPU_ARMV7 +# elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ || defined(__ARM_ARCH_6T2__) || defined(__ARM_ARCH_6Z__) \ || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6ZK__) \ || defined(__ARM_ARCH_6M__) || defined(__ARM_ARCH_6KZ__) \ || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM >= 6) - #define C4_CPU_ARMV6 - #elif defined(__ARM_ARCH_5TEJ__) \ +# define C4_CPU_ARMV6 +# elif defined(__ARM_ARCH_5TEJ__) \ || defined(__ARM_ARCH_5TE__) \ || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM >= 5) - #define C4_CPU_ARMV5 - #elif defined(__ARM_ARCH_4T__) \ +# define C4_CPU_ARMV5 +# elif defined(__ARM_ARCH_4T__) \ || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM >= 4) - #define C4_CPU_ARMV4 - #else - #error "unknown CPU architecture: ARM" - #endif - #endif - #if defined(__ARMEL__) || defined(__LITTLE_ENDIAN__) || defined(__AARCH64EL__) \ +# define C4_CPU_ARMV4 +# else +# error "unknown CPU architecture: ARM" +# endif +# endif +# if defined(__ARMEL__) || defined(__LITTLE_ENDIAN__) || defined(__AARCH64EL__) \ || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) \ || defined(_MSC_VER) // winarm64 does not provide any of the above macros, // but advises little-endianess: // https://docs.microsoft.com/en-us/cpp/build/overview-of-arm-abi-conventions?view=msvc-170 // So if it is visual studio compiling, we'll assume little endian. - #define C4_BYTE_ORDER _C4EL - #elif defined(__ARMEB__) || defined(__BIG_ENDIAN__) || defined(__AARCH64EB__) \ +# define C4_BYTE_ORDER _C4EL +# elif defined(__ARMEB__) || defined(__BIG_ENDIAN__) || defined(__AARCH64EB__) \ || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)) - #define C4_BYTE_ORDER _C4EB - #elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_PDP_ENDIAN__) - #define C4_BYTE_ORDER _C4EM - #else - #error "unknown endianness" - #endif +# define C4_BYTE_ORDER _C4EB +# elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_PDP_ENDIAN__) +# define C4_BYTE_ORDER _C4EM +# else +# error "unknown endianness" +# endif #elif defined(__ia64) || defined(__ia64__) || defined(_M_IA64) - #define C4_CPU_IA64 - #define C4_WORDSIZE 8 - #define C4_BYTE_ORDER _C4EM +# define C4_CPU_IA64 +# define C4_WORDSIZE 8 +# define C4_BYTE_ORDER _C4EM // itanium is bi-endian - check byte order below #elif defined(__ppc__) || defined(__ppc) || defined(__powerpc__) \ || defined(_ARCH_COM) || defined(_ARCH_PWR) || defined(_ARCH_PPC) \ || defined(_M_MPPC) || defined(_M_PPC) - #if defined(__ppc64__) || defined(__powerpc64__) || defined(__64BIT__) - #define C4_CPU_PPC64 - #define C4_WORDSIZE 8 - #else - #define C4_CPU_PPC - #define C4_WORDSIZE 4 - #endif - #define C4_BYTE_ORDER _C4EM +# if defined(__ppc64__) || defined(__powerpc64__) || defined(__64BIT__) +# define C4_CPU_PPC64 +# define C4_WORDSIZE 8 +# else +# define C4_CPU_PPC +# define C4_WORDSIZE 4 +# endif +# define C4_BYTE_ORDER _C4EM // ppc is bi-endian - check byte order below #elif defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH_) @@ -488,25 +501,45 @@ C4_FOR_EACH(PRN_STRUCT_OFFSETS, a, b, c); # define C4_WORDSIZE 8 # define C4_BYTE_ORDER _C4EB +#elif defined(__xtensa__) || defined(__XTENSA__) +# define C4_CPU_XTENSA +# define C4_WORDSIZE 4 +// not sure about this... +# if defined(__XTENSA_EL__) || defined(__xtensa_el__) +# define C4_BYTE_ORDER _C4EL +# else +# define C4_BYTE_ORDER _C4EB +# endif + #elif defined(__riscv) - #if __riscv_xlen == 64 - #define C4_CPU_RISCV64 - #define C4_WORDSIZE 8 - #else - #define C4_CPU_RISCV32 - #define C4_WORDSIZE 4 - #endif - #define C4_BYTE_ORDER _C4EL +# if __riscv_xlen == 64 +# define C4_CPU_RISCV64 +# define C4_WORDSIZE 8 +# else +# define C4_CPU_RISCV32 +# define C4_WORDSIZE 4 +# endif +# define C4_BYTE_ORDER _C4EL #elif defined(__EMSCRIPTEN__) # define C4_BYTE_ORDER _C4EL # define C4_WORDSIZE 4 +#elif defined(__loongarch__) +# if defined(__loongarch64) +# define C4_CPU_LOONGARCH64 +# define C4_WORDSIZE 8 +# else +# define C4_CPU_LOONGARCH +# define C4_WORDSIZE 4 +# endif +# define C4_BYTE_ORDER _C4EL + #elif defined(SWIG) - #error "please define CPU architecture macros when compiling with swig" +# error "please define CPU architecture macros when compiling with swig" #else - #error "unknown CPU architecture" +# error "unknown CPU architecture" #endif #define C4_LITTLE_ENDIAN (C4_BYTE_ORDER == _C4EL) @@ -518,6 +551,101 @@ C4_FOR_EACH(PRN_STRUCT_OFFSETS, a, b, c); // (end https://github.com/biojppm/c4core/src/c4/cpu.hpp) +// (amalgamate) these includes are needed to work around +// conditional includes in the gcc4.8 shim +#include +#include +#include + + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/gcc-4.8.hpp +// https://github.com/biojppm/c4core/src/c4/gcc-4.8.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_GCC_4_8_HPP_ +#define _C4_GCC_4_8_HPP_ + +#if __GNUC__ == 4 && __GNUC_MINOR__ >= 8 +/* STL polyfills for old GNU compilers */ + +_Pragma("GCC diagnostic ignored \"-Wshadow\"") +_Pragma("GCC diagnostic ignored \"-Wmissing-field-initializers\"") + +#if __cplusplus +//included above: +//#include +//included above: +//#include + +namespace std { + +template +struct is_trivially_copyable : public integral_constant::value && __has_trivial_destructor(_Tp) && + (__has_trivial_constructor(_Tp) || __has_trivial_copy(_Tp) || __has_trivial_assign(_Tp))> +{ }; + +template +using is_trivially_copy_constructible = has_trivial_copy_constructor<_Tp>; + +template +using is_trivially_default_constructible = has_trivial_default_constructor<_Tp>; + +template +using is_trivially_copy_assignable = has_trivial_copy_assign<_Tp>; + +/* not supported */ +template +struct is_trivially_move_constructible : false_type +{ }; + +/* not supported */ +template +struct is_trivially_move_assignable : false_type +{ }; + +inline void *align(size_t __align, size_t __size, void*& __ptr, size_t& __space) noexcept +{ + if (__space < __size) + return nullptr; + const auto __intptr = reinterpret_cast(__ptr); + const auto __aligned = (__intptr - 1u + __align) & -__align; + const auto __diff = __aligned - __intptr; + if (__diff > (__space - __size)) + return nullptr; + else + { + __space -= __diff; + return __ptr = reinterpret_cast(__aligned); + } +} + +#if __GNUC__ == 4 && __GNUC_MINOR__ == 8 +typedef long double max_align_t ; +#endif + +} +#else // __cplusplus + +//included above: +//#include +// see https://sourceware.org/bugzilla/show_bug.cgi?id=25399 (ubuntu gcc-4.8) +#define memset(s, c, count) __builtin_memset(s, c, count) + +#endif // __cplusplus + +#endif // __GNUC__ == 4 && __GNUC_MINOR__ >= 8 + +#endif // _C4_GCC_4_8_HPP_ + + +// (end https://github.com/biojppm/c4core/src/c4/gcc-4.8.hpp) + //******************************************************************************** @@ -559,7 +687,7 @@ C4_FOR_EACH(PRN_STRUCT_OFFSETS, a, b, c); /** @see http://sourceforge.net/p/predef/wiki/Compilers/ for a list of compiler identifier macros */ /** @see https://msdn.microsoft.com/en-us/library/b0084kay.aspx for VS2013 predefined macros */ -#if defined(_MSC_VER)// && (defined(C4_WIN) || defined(C4_XBOX) || defined(C4_UE4)) +#if defined(_MSC_VER) && !defined(__clang__) # define C4_MSVC # define C4_MSVC_VERSION_2022 17 # define C4_MSVC_VERSION_2019 16 @@ -571,7 +699,7 @@ C4_FOR_EACH(PRN_STRUCT_OFFSETS, a, b, c); # define C4_MSVC_VERSION C4_MSVC_VERSION_2022 // visual studio 2022 # define C4_MSVC_2022 # elif _MSC_VER >= 1920 -# define C4_MSVC_VERSION C_4MSVC_VERSION_2019 // visual studio 2019 +# define C4_MSVC_VERSION C4_MSVC_VERSION_2019 // visual studio 2019 # define C4_MSVC_2019 # elif _MSC_VER >= 1910 # define C4_MSVC_VERSION C4_MSVC_VERSION_2017 // visual studio 2017 @@ -628,6 +756,9 @@ C4_FOR_EACH(PRN_STRUCT_OFFSETS, a, b, c); # define C4_CLANG_VERSION __apple_build_version__ # endif # elif defined(__GNUC__) +# ifdef __MINGW32__ +# define C4_MINGW +# endif # define C4_GCC # if defined(__GNUC_PATCHLEVEL__) # define C4_GCC_VERSION C4_VERSION_ENCODED(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) @@ -660,98 +791,6 @@ C4_FOR_EACH(PRN_STRUCT_OFFSETS, a, b, c); // (end https://github.com/biojppm/c4core/src/c4/compiler.hpp) -// these includes are needed to work around conditional -// includes in the gcc4.8 shim -#include -#include -#include - - - - -//******************************************************************************** -//-------------------------------------------------------------------------------- -// cmake/compat/c4/gcc-4.8.hpp -// https://github.com/biojppm/c4core/cmake/compat/c4/gcc-4.8.hpp -//-------------------------------------------------------------------------------- -//******************************************************************************** - -#ifndef _C4_COMPAT_GCC_4_8_HPP_ -#define _C4_COMPAT_GCC_4_8_HPP_ - -#if __GNUC__ == 4 && __GNUC_MINOR__ >= 8 -/* STL polyfills for old GNU compilers */ - -_Pragma("GCC diagnostic ignored \"-Wshadow\"") -_Pragma("GCC diagnostic ignored \"-Wmissing-field-initializers\"") - -#if __cplusplus -//included above: -//#include -//included above: -//#include - -namespace std { - -template -struct is_trivially_copyable : public integral_constant::value && __has_trivial_destructor(_Tp) && - (__has_trivial_constructor(_Tp) || __has_trivial_copy(_Tp) || __has_trivial_assign(_Tp))> -{ }; - -template -using is_trivially_copy_constructible = has_trivial_copy_constructor<_Tp>; - -template -using is_trivially_default_constructible = has_trivial_default_constructor<_Tp>; - -template -using is_trivially_copy_assignable = has_trivial_copy_assign<_Tp>; - -/* not supported */ -template -struct is_trivially_move_constructible : false_type -{ }; - -/* not supported */ -template -struct is_trivially_move_assignable : false_type -{ }; - -inline void *align(size_t __align, size_t __size, void*& __ptr, size_t& __space) noexcept -{ - if (__space < __size) - return nullptr; - const auto __intptr = reinterpret_cast(__ptr); - const auto __aligned = (__intptr - 1u + __align) & -__align; - const auto __diff = __aligned - __intptr; - if (__diff > (__space - __size)) - return nullptr; - else - { - __space -= __diff; - return __ptr = reinterpret_cast(__aligned); - } -} -typedef long double max_align_t ; - -} -#else // __cplusplus - -//included above: -//#include -// see https://sourceware.org/bugzilla/show_bug.cgi?id=25399 (ubuntu gcc-4.8) -#define memset(s, c, count) __builtin_memset(s, c, count) - -#endif // __cplusplus - -#endif // __GNUC__ == 4 && __GNUC_MINOR__ >= 8 - -#endif // _C4_COMPAT_GCC_4_8_HPP_ - - -// (end https://github.com/biojppm/c4core/cmake/compat/c4/gcc-4.8.hpp) - //******************************************************************************** @@ -787,8 +826,8 @@ typedef long double max_align_t ; /* Detect C++ standard. * @see http://stackoverflow.com/a/7132549/5875572 */ #ifndef C4_CPP -# ifdef _MSC_VER -# if _MSC_VER >= 1910 // >VS2015: VS2017, VS2019 +# if defined(_MSC_VER) && !defined(__clang__) +# if _MSC_VER >= 1910 // >VS2015: VS2017, VS2019, VS2022 # if (!defined(_MSVC_LANG)) # error _MSVC not defined # endif @@ -885,33 +924,27 @@ typedef long double max_align_t ; #endif /** lifted from this answer: http://stackoverflow.com/a/20170989/5875572 */ -#ifndef _MSC_VER -# if __cplusplus < 201103 +#if defined(_MSC_VER) && !defined(__clang__) +# if _MSC_VER < 1900 # define C4_CONSTEXPR11 # define C4_CONSTEXPR14 -//# define C4_NOEXCEPT -# elif __cplusplus == 201103 +# elif _MSC_VER < 2000 # define C4_CONSTEXPR11 constexpr # define C4_CONSTEXPR14 -//# define C4_NOEXCEPT noexcept # else # define C4_CONSTEXPR11 constexpr # define C4_CONSTEXPR14 constexpr -//# define C4_NOEXCEPT noexcept # endif -#else // _MSC_VER -# if _MSC_VER < 1900 +#else +# if __cplusplus < 201103 # define C4_CONSTEXPR11 # define C4_CONSTEXPR14 -//# define C4_NOEXCEPT -# elif _MSC_VER < 2000 +# elif __cplusplus == 201103 # define C4_CONSTEXPR11 constexpr # define C4_CONSTEXPR14 -//# define C4_NOEXCEPT noexcept # else # define C4_CONSTEXPR11 constexpr # define C4_CONSTEXPR14 constexpr -//# define C4_NOEXCEPT noexcept # endif #endif // _MSC_VER @@ -924,6 +957,42 @@ typedef long double max_align_t ; #define C4_INLINE_CONSTEXPR inline constexpr #endif +#if defined(_MSC_VER) && !defined(__clang__) +# if (defined(_CPPUNWIND) && (_CPPUNWIND == 1)) +# define C4_EXCEPTIONS +# endif +#else +# if defined(__EXCEPTIONS) || defined(__cpp_exceptions) +# define C4_EXCEPTIONS +# endif +#endif + +#ifdef C4_EXCEPTIONS +# define C4_IF_EXCEPTIONS_(exc_code, setjmp_code) exc_code +# define C4_IF_EXCEPTIONS(exc_code, setjmp_code) do { exc_code } while(0) +#else +# define C4_IF_EXCEPTIONS_(exc_code, setjmp_code) setjmp_code +# define C4_IF_EXCEPTIONS(exc_code, setjmp_code) do { setjmp_code } while(0) +#endif + +#if defined(_MSC_VER) && !defined(__clang__) +# if defined(_CPPRTTI) +# define C4_RTTI +# endif +#else +# if defined(__GXX_RTTI) +# define C4_RTTI +# endif +#endif + +#ifdef C4_RTTI +# define C4_IF_RTTI_(code_rtti, code_no_rtti) code_rtti +# define C4_IF_RTTI(code_rtti, code_no_rtti) do { code_rtti } while(0) +#else +# define C4_IF_RTTI_(code_rtti, code_no_rtti) code_no_rtti +# define C4_IF_RTTI(code_rtti, code_no_rtti) do { code_no_rtti } while(0) +#endif + //------------------------------------------------------------ @@ -942,7 +1011,7 @@ typedef long double max_align_t ; //------------------------------------------------------------ #ifndef C4_API -# if defined(_MSC_VER) +# if defined(_MSC_VER) && !defined(__clang__) # if defined(C4_EXPORT) # define C4_API __declspec(dllexport) # elif defined(C4_IMPORT) @@ -955,7 +1024,33 @@ typedef long double max_align_t ; # endif #endif -#ifndef _MSC_VER ///< @todo assuming gcc-like compiler. check it is actually so. +#if defined(_MSC_VER) && !defined(__clang__) +# define C4_RESTRICT __restrict +# define C4_RESTRICT_FN __declspec(restrict) +# define C4_NO_INLINE __declspec(noinline) +# define C4_ALWAYS_INLINE inline __forceinline +/** these are not available in VS AFAIK */ +# define C4_CONST +# define C4_PURE +# define C4_FLATTEN +# define C4_HOT /** @todo */ +# define C4_COLD /** @todo */ +# define C4_ASSUME(...) __assume(__VA_ARGS__) +# define C4_EXPECT(x, y) x /** @todo */ +# define C4_LIKELY(x) x +# define C4_UNLIKELY(x) x +# define C4_UNREACHABLE() _c4_msvc_unreachable() +# define C4_ATTR_FORMAT(...) /** */ +# define C4_NORETURN [[noreturn]] +# if _MSC_VER >= 1700 // VS2012 +# define C4_NODISCARD _Check_return_ +# else +# define C4_NODISCARD +# endif +[[noreturn]] __forceinline void _c4_msvc_unreachable() { __assume(false); } ///< https://stackoverflow.com/questions/60802864/emulating-gccs-builtin-unreachable-in-visual-studio +# define C4_UNREACHABLE_AFTER_ERR() /* */ +#else + ///< @todo assuming gcc-like compiler. check it is actually so. /** for function attributes in GCC, * @see https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#Common-Function-Attributes */ /** for __builtin functions in GCC, @@ -981,31 +1076,58 @@ typedef long double max_align_t ; # define C4_UNREACHABLE() __builtin_unreachable() # define C4_ATTR_FORMAT(...) //__attribute__((format (__VA_ARGS__))) ///< @see https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#Common-Function-Attributes # define C4_NORETURN __attribute__((noreturn)) -#else -# define C4_RESTRICT __restrict -# define C4_RESTRICT_FN __declspec(restrict) -# define C4_NO_INLINE __declspec(noinline) -# define C4_ALWAYS_INLINE inline __forceinline -/** these are not available in VS AFAIK */ -# define C4_CONST -# define C4_PURE -# define C4_FLATTEN -# define C4_HOT /** @todo */ -# define C4_COLD /** @todo */ -# define C4_EXPECT(x, y) x /** @todo */ -# define C4_LIKELY(x) x /** @todo */ -# define C4_UNLIKELY(x) x /** @todo */ -# define C4_UNREACHABLE() /** @todo */ -# define C4_ATTR_FORMAT(...) /** */ -# define C4_NORETURN /** @todo */ +# define C4_NODISCARD __attribute__((warn_unused_result)) +# define C4_UNREACHABLE_AFTER_ERR() C4_UNREACHABLE() +// C4_ASSUME +// see https://stackoverflow.com/questions/63493968/reproducing-clangs-builtin-assume-for-gcc +// preferred option: C++ standard attribute +# ifdef __has_cpp_attribute +# if __has_cpp_attribute(assume) >= 202207L +# define C4_ASSUME(...) [[assume(__VA_ARGS__)]] +# endif +# endif +// first fallback: compiler intrinsics/attributes for assumptions +# ifndef C4_ASSUME +# if defined(__clang__) +# define C4_ASSUME(...) __builtin_assume(__VA_ARGS__) +# elif defined(__GNUC__) +# if __GNUC__ >= 13 +# define C4_ASSUME(...) __attribute__((__assume__(__VA_ARGS__))) +# endif +# endif +# endif +// second fallback: possibly evaluating uses of unreachable() +// Set this to 1 if you want to allow assumptions to possibly evaluate. +# ifndef C4_ASSUME_ALLOW_EVAL +# define C4_ASSUME_ALLOW_EVAL 0 +# endif +# if !defined(C4_ASSUME) && (C4_ASSUME_ALLOW_EVAL) +# define C4_ASSUME(...) do { if (!bool(__VA_ARGS__)) C4_UNREACHABLE(); ) while(0) +# endif +// last fallback: define macro as doing nothing +# ifndef C4_ASSUME +# define C4_ASSUME(...) +# endif +#endif + + +#if C4_CPP >= 14 +# define C4_DEPRECATED(msg) [[deprecated(msg)]] +#else +# if defined(_MSC_VER) +# define C4_DEPRECATED(msg) __declspec(deprecated(msg)) +# else // defined(__GNUC__) || defined(__clang__) +# define C4_DEPRECATED(msg) __attribute__((deprecated(msg))) +# endif #endif -#ifndef _MSC_VER + +#ifdef _MSC_VER # define C4_FUNC __FUNCTION__ -# define C4_PRETTY_FUNC __PRETTY_FUNCTION__ +# define C4_PRETTY_FUNC __FUNCSIG__ #else /// @todo assuming gcc-like compiler. check it is actually so. # define C4_FUNC __FUNCTION__ -# define C4_PRETTY_FUNC __FUNCSIG__ +# define C4_PRETTY_FUNC __PRETTY_FUNCTION__ #endif /** prevent compiler warnings about a specific var being unused */ @@ -1035,10 +1157,10 @@ void use_char_pointer(char const volatile*); /** @def C4_KEEP_EMPTY_LOOP prevent an empty loop from being optimized out. * @see http://stackoverflow.com/a/7084193/5875572 */ -#ifndef _MSC_VER -# define C4_KEEP_EMPTY_LOOP { asm(""); } -#else +#if defined(_MSC_VER) && !defined(__clang__) # define C4_KEEP_EMPTY_LOOP { char c; C4_DONT_OPTIMIZE(c); } +#else +# define C4_KEEP_EMPTY_LOOP { asm(""); } #endif /** @def C4_VA_LIST_REUSE_MUST_COPY @@ -1127,6 +1249,13 @@ using ssize_t = typename std::make_signed::type; // some tag types +#if !defined(__clang__) && defined(__GNUC__) +#pragma GCC diagnostic push +#if __GNUC__ >= 6 +#pragma GCC diagnostic ignored "-Wunused-const-variable" +#endif +#endif + /** a tag type for initializing the containers with variadic arguments a la * initializer_list, minus the initializer_list overload problems. */ @@ -1144,6 +1273,10 @@ struct varargs_t {}; /** @see with_capacity_t */ constexpr const varargs_t varargs{}; +#if !defined(__clang__) && defined(__GNUC__) +#pragma GCC diagnostic pop +#endif + //-------------------------------------------------- @@ -2052,7 +2185,8 @@ struct ScopedErrorSettings /** source location */ struct srcloc; -C4CORE_EXPORT void handle_error(srcloc s, const char *fmt, ...); +// watchout: for VS the [[noreturn]] needs to come before other annotations like C4CORE_EXPORT +[[noreturn]] C4CORE_EXPORT void handle_error(srcloc s, const char *fmt, ...); C4CORE_EXPORT void handle_warning(srcloc s, const char *fmt, ...); @@ -2221,12 +2355,12 @@ struct srcloc // Common error conditions #define C4_NOT_IMPLEMENTED() C4_ERROR("NOT IMPLEMENTED") -#define C4_NOT_IMPLEMENTED_MSG(/*msg, */...) C4_ERROR("NOT IMPLEMENTED: " ## __VA_ARGS__) +#define C4_NOT_IMPLEMENTED_MSG(/*msg, */...) C4_ERROR("NOT IMPLEMENTED: " __VA_ARGS__) #define C4_NOT_IMPLEMENTED_IF(condition) do { if(C4_UNLIKELY(condition)) { C4_ERROR("NOT IMPLEMENTED"); } } while(0) -#define C4_NOT_IMPLEMENTED_IF_MSG(condition, /*msg, */...) do { if(C4_UNLIKELY(condition)) { C4_ERROR("NOT IMPLEMENTED: " ## __VA_ARGS__); } } while(0) +#define C4_NOT_IMPLEMENTED_IF_MSG(condition, /*msg, */...) do { if(C4_UNLIKELY(condition)) { C4_ERROR("NOT IMPLEMENTED: " __VA_ARGS__); } } while(0) #define C4_NEVER_REACH() do { C4_ERROR("never reach this point"); C4_UNREACHABLE(); } while(0) -#define C4_NEVER_REACH_MSG(/*msg, */...) do { C4_ERROR("never reach this point: " ## __VA_ARGS__); C4_UNREACHABLE(); } while(0) +#define C4_NEVER_REACH_MSG(/*msg, */...) do { C4_ERROR("never reach this point: " __VA_ARGS__); C4_UNREACHABLE(); } while(0) @@ -2234,19 +2368,17 @@ struct srcloc // helpers for warning suppression // idea adapted from https://github.com/onqtam/doctest/ +// TODO: add C4_MESSAGE() https://stackoverflow.com/questions/18252351/custom-preprocessor-macro-for-a-conditional-pragma-message-xxx?rq=1 + #ifdef C4_MSVC #define C4_SUPPRESS_WARNING_MSVC_PUSH __pragma(warning(push)) #define C4_SUPPRESS_WARNING_MSVC(w) __pragma(warning(disable : w)) #define C4_SUPPRESS_WARNING_MSVC_POP __pragma(warning(pop)) -#define C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(w) \ - C4_SUPPRESS_WARNING_MSVC_PUSH \ - C4_SUPPRESS_WARNING_MSVC(w) #else // C4_MSVC #define C4_SUPPRESS_WARNING_MSVC_PUSH #define C4_SUPPRESS_WARNING_MSVC(w) #define C4_SUPPRESS_WARNING_MSVC_POP -#define C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(w) #endif // C4_MSVC @@ -2255,14 +2387,10 @@ struct srcloc #define C4_SUPPRESS_WARNING_CLANG_PUSH _Pragma("clang diagnostic push") #define C4_SUPPRESS_WARNING_CLANG(w) C4_PRAGMA_TO_STR(clang diagnostic ignored w) #define C4_SUPPRESS_WARNING_CLANG_POP _Pragma("clang diagnostic pop") -#define C4_SUPPRESS_WARNING_CLANG_WITH_PUSH(w) \ - C4_SUPPRESS_WARNING_CLANG_PUSH \ - C4_SUPPRESS_WARNING_CLANG(w) #else // C4_CLANG #define C4_SUPPRESS_WARNING_CLANG_PUSH #define C4_SUPPRESS_WARNING_CLANG(w) #define C4_SUPPRESS_WARNING_CLANG_POP -#define C4_SUPPRESS_WARNING_CLANG_WITH_PUSH(w) #endif // C4_CLANG @@ -2271,17 +2399,26 @@ struct srcloc #define C4_SUPPRESS_WARNING_GCC_PUSH _Pragma("GCC diagnostic push") #define C4_SUPPRESS_WARNING_GCC(w) C4_PRAGMA_TO_STR(GCC diagnostic ignored w) #define C4_SUPPRESS_WARNING_GCC_POP _Pragma("GCC diagnostic pop") -#define C4_SUPPRESS_WARNING_GCC_WITH_PUSH(w) \ - C4_SUPPRESS_WARNING_GCC_PUSH \ - C4_SUPPRESS_WARNING_GCC(w) #else // C4_GCC #define C4_SUPPRESS_WARNING_GCC_PUSH #define C4_SUPPRESS_WARNING_GCC(w) #define C4_SUPPRESS_WARNING_GCC_POP -#define C4_SUPPRESS_WARNING_GCC_WITH_PUSH(w) #endif // C4_GCC +#define C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(w) \ + C4_SUPPRESS_WARNING_MSVC_PUSH \ + C4_SUPPRESS_WARNING_MSVC(w) + +#define C4_SUPPRESS_WARNING_CLANG_WITH_PUSH(w) \ + C4_SUPPRESS_WARNING_CLANG_PUSH \ + C4_SUPPRESS_WARNING_CLANG(w) + +#define C4_SUPPRESS_WARNING_GCC_WITH_PUSH(w) \ + C4_SUPPRESS_WARNING_GCC_PUSH \ + C4_SUPPRESS_WARNING_GCC(w) + + #define C4_SUPPRESS_WARNING_GCC_CLANG_PUSH \ C4_SUPPRESS_WARNING_GCC_PUSH \ C4_SUPPRESS_WARNING_CLANG_PUSH @@ -2372,6 +2509,8 @@ struct srcloc namespace c4 { +C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast") + /** set the given memory to zero */ C4_ALWAYS_INLINE void mem_zero(void* mem, size_t num_bytes) { @@ -3114,6 +3253,8 @@ struct tight_pair : public Second template using tight_pair = detail::tight_pair()>; +C4_SUPPRESS_WARNING_GCC_CLANG_POP + } // namespace c4 #endif /* _C4_MEMORY_UTIL_HPP_ */ @@ -3765,6 +3906,8 @@ struct ScopedMemoryResourceCounts namespace c4 { +C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast") + /** default-construct an object, trivial version */ template C4_ALWAYS_INLINE typename std::enable_if::value, void>::type construct(U *ptr) noexcept @@ -4206,9 +4349,9 @@ destroy_room(U *dst, U const* src, I n, I room, I pos) } } -} // namespace c4 +C4_SUPPRESS_WARNING_GCC_CLANG_POP -#undef _C4REQUIRE +} // namespace c4 #endif /* _C4_CTOR_DTOR_HPP_ */ @@ -4263,6 +4406,8 @@ destroy_room(U *dst, U const* src, I n, I room, I pos) namespace c4 { +C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast") + namespace detail { template inline size_t size_for (size_t num_objs) noexcept { return num_objs * sizeof(T); } template< > inline size_t size_for(size_t num_objs) noexcept { return num_objs; } @@ -4639,6 +4784,8 @@ template using small_allocato /** @ingroup allocators */ template using small_allocator_mr = SmallAllocator; +C4_SUPPRESS_WARNING_GCC_CLANG_POP + } // namespace c4 #endif /* _C4_ALLOCATOR_HPP_ */ @@ -4910,6 +5057,8 @@ C4_CONSTEXPR14 inline size_t hash_bytes(const char (&str)[N]) noexcept namespace c4 { +C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast") + /** @todo this would be so much easier with calls to numeric_limits::max()... */ template struct is_narrower_size : std::conditional @@ -4951,7 +5100,7 @@ szconv(SizeIn sz) noexcept template C4_ALWAYS_INLINE typename std::enable_if::value, SizeOut>::type -szconv(SizeIn sz) C4_NOEXCEPT_X +szconv(SizeIn sz) { C4_XASSERT(sz >= 0); C4_XASSERT_MSG((SizeIn)sz <= (SizeIn)std::numeric_limits::max(), "size conversion overflow: in=%zu", (size_t)sz); @@ -4959,6 +5108,8 @@ szconv(SizeIn sz) C4_NOEXCEPT_X return szo; } +C4_SUPPRESS_WARNING_GCC_CLANG_POP + } // namespace c4 #endif /* _C4_SZCONV_HPP_ */ @@ -4998,32 +5149,49 @@ szconv(SizeIn sz) C4_NOEXCEPT_X namespace c4 { +template +struct blob_; + +namespace detail { +template struct is_blob_type : std::integral_constant {}; +template struct is_blob_type> : std::integral_constant {}; +template struct is_blob_value_type : std::integral_constant::value || std::is_trivially_copyable::value)> {}; +} // namespace + template struct blob_ { + static_assert(std::is_same::value || std::is_same::value, "must be either byte or cbyte"); + static_assert(sizeof(T) == 1u, "must be either byte or cbyte"); + +public: + T * buf; size_t len; - C4_ALWAYS_INLINE blob_() noexcept : buf(), len() {} +public: + C4_ALWAYS_INLINE blob_() noexcept = default; C4_ALWAYS_INLINE blob_(blob_ const& that) noexcept = default; C4_ALWAYS_INLINE blob_(blob_ && that) noexcept = default; C4_ALWAYS_INLINE blob_& operator=(blob_ && that) noexcept = default; C4_ALWAYS_INLINE blob_& operator=(blob_ const& that) noexcept = default; - // need to sfinae out copy constructors! (why? isn't the above sufficient?) - #define _C4_REQUIRE_NOT_SAME class=typename std::enable_if<( ! std::is_same::value) && ( ! std::is_pointer::value), T>::type - template C4_ALWAYS_INLINE blob_(U &var) noexcept : buf(reinterpret_cast(&var)), len(sizeof(U)) {} - template C4_ALWAYS_INLINE blob_& operator= (U &var) noexcept { buf = reinterpret_cast(&var); len = sizeof(U); return *this; } - #undef _C4_REQUIRE_NOT_SAME - - template C4_ALWAYS_INLINE blob_(U (&arr)[N]) noexcept : buf(reinterpret_cast(arr)), len(sizeof(U) * N) {} - template C4_ALWAYS_INLINE blob_& operator= (U (&arr)[N]) noexcept { buf = reinterpret_cast(arr); len = sizeof(U) * N; return *this; } + template::value && std::is_same::type, T>::value, U>::type> C4_ALWAYS_INLINE blob_(blob_ const& that) noexcept : buf(that.buf), len(that.len) {} + template::value && std::is_same::type, T>::value, U>::type> C4_ALWAYS_INLINE blob_(blob_ && that) noexcept : buf(that.buf), len(that.len) {} + template::value && std::is_same::type, T>::value, U>::type> C4_ALWAYS_INLINE blob_& operator=(blob_ && that) noexcept { buf = that.buf; len = that.len; } + template::value && std::is_same::type, T>::value, U>::type> C4_ALWAYS_INLINE blob_& operator=(blob_ const& that) noexcept { buf = that.buf; len = that.len; } - template - C4_ALWAYS_INLINE blob_(U *ptr, size_t n) noexcept : buf(reinterpret_cast(ptr)), len(sizeof(U) * n) { C4_ASSERT(is_aligned(ptr)); } C4_ALWAYS_INLINE blob_(void *ptr, size_t n) noexcept : buf(reinterpret_cast(ptr)), len(n) {} C4_ALWAYS_INLINE blob_(void const *ptr, size_t n) noexcept : buf(reinterpret_cast(ptr)), len(n) {} + + #define _C4_REQUIRE_BLOBTYPE(ty) class=typename std::enable_if<((!detail::is_blob_type::value) && (detail::is_blob_value_type::value)), T>::type + template C4_ALWAYS_INLINE blob_(U &var) noexcept : buf(reinterpret_cast(&var)), len(sizeof(U)) {} + template C4_ALWAYS_INLINE blob_(U *ptr, size_t n) noexcept : buf(reinterpret_cast(ptr)), len(sizeof(U) * n) { C4_ASSERT(is_aligned(ptr)); } + template C4_ALWAYS_INLINE blob_& operator= (U &var) noexcept { buf = reinterpret_cast(&var); len = sizeof(U); return *this; } + template C4_ALWAYS_INLINE blob_(U (&arr)[N]) noexcept : buf(reinterpret_cast(arr)), len(sizeof(U) * N) {} + template C4_ALWAYS_INLINE blob_& operator= (U (&arr)[N]) noexcept { buf = reinterpret_cast(arr); len = sizeof(U) * N; return *this; } + #undef _C4_REQUIRE_BLOBTYPE }; /** an immutable binary blob */ @@ -5121,22 +5289,26 @@ using substr = C4CORE_EXPORT basic_substring; #ifdef __clang__ # pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" #elif defined(__GNUC__) # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Wtype-limits" // disable warnings on size_t>=0, used heavily in assertions below. These assertions are a preparation step for providing the index type as a template parameter. # pragma GCC diagnostic ignored "-Wuseless-cast" +# pragma GCC diagnostic ignored "-Wold-style-cast" #endif namespace c4 { +/** @defgroup doc_substr Substring: read/write string views + * @{ */ //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- +/** @cond dev */ namespace detail { - template static inline void _do_reverse(C *C4_RESTRICT first, C *C4_RESTRICT last) { @@ -5147,36 +5319,33 @@ static inline void _do_reverse(C *C4_RESTRICT first, C *C4_RESTRICT last) *first++ = tmp; } } - } // namespace detail - +/** @endcond */ //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- +/** @cond dev */ // utility macros to deuglify SFINAE code; undefined after the class. // https://stackoverflow.com/questions/43051882/how-to-disable-a-class-member-funrtion-for-certain-template-types #define C4_REQUIRE_RW(ret_type) \ template \ typename std::enable_if< ! std::is_const::value, ret_type>::type -// non-const-to-const -#define C4_NC2C(ty) \ - typename std::enable_if::value && ( ! std::is_const::value), ty>::type +/** @endcond */ /** a non-owning string-view, consisting of a character pointer * and a length. * * @note The pointer is explicitly restricted. - * @note Because of a C++ limitation, there cannot coexist overloads for - * constructing from a char[N] and a char*; the latter will always be chosen - * by the compiler. To construct an object of this type, call to_substr() or - * to_csubstr(). For a more detailed explanation on why the overloads cannot - * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html * - * @see to_substr() - * @see to_csubstr() + * @see a [quickstart + * sample](https://rapidyaml.readthedocs.io/latest/doxygen/group__doc__quickstart.html#ga43e253da0692c13967019446809c1113) + * in rapidyaml's documentation. + * + * @see @ref substr and @ref to_substr() + * @see @ref csubstr and @ref to_csubstr() */ template struct C4CORE_EXPORT basic_substring @@ -5208,7 +5377,11 @@ struct C4CORE_EXPORT basic_substring enum : size_t { npos = (size_t)-1, NONE = (size_t)-1 }; /// convert automatically to substring of const C - operator ro_substr () const { ro_substr s(str, len); return s; } + template + C4_ALWAYS_INLINE operator typename std::enable_if::value, ro_substr const&>::type () const noexcept + { + return *(ro_substr const*)this; // don't call the str+len ctor because it does a check + } /** @} */ @@ -5217,15 +5390,17 @@ struct C4CORE_EXPORT basic_substring /** @name Default construction and assignment */ /** @{ */ - constexpr basic_substring() : str(nullptr), len(0) {} + C4_ALWAYS_INLINE constexpr basic_substring() noexcept : str(), len() {} + + C4_ALWAYS_INLINE basic_substring(basic_substring const&) noexcept = default; + C4_ALWAYS_INLINE basic_substring(basic_substring &&) noexcept = default; + C4_ALWAYS_INLINE basic_substring(std::nullptr_t) noexcept : str(nullptr), len(0) {} - constexpr basic_substring(basic_substring const&) = default; - constexpr basic_substring(basic_substring &&) = default; - constexpr basic_substring(std::nullptr_t) : str(nullptr), len(0) {} + C4_ALWAYS_INLINE basic_substring& operator= (basic_substring const&) noexcept = default; + C4_ALWAYS_INLINE basic_substring& operator= (basic_substring &&) noexcept = default; + C4_ALWAYS_INLINE basic_substring& operator= (std::nullptr_t) noexcept { str = nullptr; len = 0; return *this; } - basic_substring& operator= (basic_substring const&) = default; - basic_substring& operator= (basic_substring &&) = default; - basic_substring& operator= (std::nullptr_t) { str = nullptr; len = 0; return *this; } + C4_ALWAYS_INLINE void clear() noexcept { str = nullptr; len = 0; } /** @} */ @@ -5234,62 +5409,60 @@ struct C4CORE_EXPORT basic_substring /** @name Construction and assignment from characters with the same type */ /** @{ */ - //basic_substring(C *s_) : str(s_), len(s_ ? strlen(s_) : 0) {} - /** the overload for receiving a single C* pointer will always - * hide the array[N] overload. So it is disabled. If you want to - * construct a substr from a single pointer containing a C-style string, - * you can call c4::to_substr()/c4::to_csubstr(). - * @see c4::to_substr() - * @see c4::to_csubstr() */ + /** Construct from an array. + * @warning the input string need not be zero terminated, but the + * length is taken as if the string was zero terminated */ template - constexpr basic_substring(C (&s_)[N]) noexcept : str(s_), len(N-1) {} - basic_substring(C *s_, size_t len_) : str(s_), len(len_) { C4_ASSERT(str || !len_); } - basic_substring(C *beg_, C *end_) : str(beg_), len(static_cast(end_ - beg_)) { C4_ASSERT(end_ >= beg_); } - - //basic_substring& operator= (C *s_) { this->assign(s_); return *this; } + C4_ALWAYS_INLINE constexpr basic_substring(C (&s_)[N]) noexcept : str(s_), len(N-1) {} + /** Construct from a pointer and length. + * @warning the input string need not be zero terminated. */ + C4_ALWAYS_INLINE basic_substring(C *s_, size_t len_) noexcept : str(s_), len(len_) { C4_ASSERT(str || !len_); } + /** Construct from two pointers. + * @warning the end pointer MUST BE larger than or equal to the begin pointer + * @warning the input string need not be zero terminated */ + C4_ALWAYS_INLINE basic_substring(C *beg_, C *end_) noexcept : str(beg_), len(static_cast(end_ - beg_)) { C4_ASSERT(end_ >= beg_); } + /** Construct from a C-string (zero-terminated string) + * @warning the input string MUST BE zero terminated. + * @warning will call strlen() + * @note this overload uses SFINAE to prevent it from overriding the array ctor + * @see For a more detailed explanation on why the plain overloads cannot + * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */ + template::value || std::is_same::value, int>::type=0> + C4_ALWAYS_INLINE basic_substring(U s_) noexcept : str(s_), len(s_ ? strlen(s_) : 0) {} + + /** Assign from an array. + * @warning the input string need not be zero terminated, but the + * length is taken as if the string was zero terminated */ template - basic_substring& operator= (C (&s_)[N]) { this->assign(s_); return *this; } - - //void assign(C *s_) { str = (s_); len = (s_ ? strlen(s_) : 0); } - /** the overload for receiving a single C* pointer will always - * hide the array[N] overload. So it is disabled. If you want to - * construct a substr from a single pointer containing a C-style string, - * you can call c4::to_substr()/c4::to_csubstr(). - * @see c4::to_substr() - * @see c4::to_csubstr() */ + C4_ALWAYS_INLINE void assign(C (&s_)[N]) noexcept { str = (s_); len = (N-1); } + /** Assign from a pointer and length. + * @warning the input string need not be zero terminated. */ + C4_ALWAYS_INLINE void assign(C *s_, size_t len_) noexcept { str = s_; len = len_; C4_ASSERT(str || !len_); } + /** Assign from two pointers. + * @warning the end pointer MUST BE larger than or equal to the begin pointer + * @warning the input string need not be zero terminated. */ + C4_ALWAYS_INLINE void assign(C *beg_, C *end_) noexcept { C4_ASSERT(end_ >= beg_); str = (beg_); len = static_cast(end_ - beg_); } + /** Assign from a C-string (zero-terminated string) + * @warning the input string must be zero terminated. + * @warning will call strlen() + * @note this overload uses SFINAE to prevent it from overriding the array ctor + * @see For a more detailed explanation on why the plain overloads cannot + * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */ + template::value || std::is_same::value, int>::type=0> + C4_ALWAYS_INLINE void assign(U s_) noexcept { str = (s_); len = (s_ ? strlen(s_) : 0); } + + /** Assign from an array. + * @warning the input string need not be zero terminated. */ template - void assign(C (&s_)[N]) { str = (s_); len = (N-1); } - void assign(C *s_, size_t len_) { str = s_; len = len_; C4_ASSERT(str || !len_); } - void assign(C *beg_, C *end_) { C4_ASSERT(end_ >= beg_); str = (beg_); len = (end_ - beg_); } - - void clear() { str = nullptr; len = 0; } - - /** @} */ - -public: - - /** @name Construction from non-const characters */ - /** @{ */ - - // when the char type is const, allow construction and assignment from non-const chars - - /** only available when the char type is const */ - template explicit basic_substring(C4_NC2C(U) (&s_)[N]) { str = s_; len = N-1; } - /** only available when the char type is const */ - template< class U=NCC_> basic_substring(C4_NC2C(U) *s_, size_t len_) { str = s_; len = len_; } - /** only available when the char type is const */ - template< class U=NCC_> basic_substring(C4_NC2C(U) *beg_, C4_NC2C(U) *end_) { C4_ASSERT(end_ >= beg_); str = beg_; len = end_ - beg_; } - - /** only available when the char type is const */ - template void assign(C4_NC2C(U) (&s_)[N]) { str = s_; len = N-1; } - /** only available when the char type is const */ - template< class U=NCC_> void assign(C4_NC2C(U) *s_, size_t len_) { str = s_; len = len_; } - /** only available when the char type is const */ - template< class U=NCC_> void assign(C4_NC2C(U) *beg_, C4_NC2C(U) *end_) { C4_ASSERT(end_ >= beg_); str = beg_; len = end_ - beg_; } - - /** only available when the char type is const */ - template - basic_substring& operator=(C4_NC2C(U) (&s_)[N]) { str = s_; len = N-1; return *this; } + C4_ALWAYS_INLINE basic_substring& operator= (C (&s_)[N]) noexcept { str = (s_); len = (N-1); return *this; } + /** Assign from a C-string (zero-terminated string) + * @warning the input string MUST BE zero terminated. + * @warning will call strlen() + * @note this overload uses SFINAE to prevent it from overriding the array ctor + * @see For a more detailed explanation on why the plain overloads cannot + * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */ + template::value || std::is_same::value, int>::type=0> + C4_ALWAYS_INLINE basic_substring& operator= (U s_) noexcept { str = s_; len = s_ ? strlen(s_) : 0; return *this; } /** @} */ @@ -5455,7 +5628,7 @@ struct C4CORE_EXPORT basic_substring return basic_substring(str, num != npos ? num : len); } - /** return the last @num elements: [len-num,len[*/ + /** return the last @p num elements: [len-num,len[*/ C4_ALWAYS_INLINE C4_PURE basic_substring last(size_t num) const noexcept { C4_ASSERT(num <= len || num == npos); @@ -5945,7 +6118,17 @@ struct C4CORE_EXPORT basic_substring public: - size_t first_not_of(const C c, size_t start=0) const + size_t first_not_of(const C c) const + { + for(size_t i = 0; i < len; ++i) + { + if(str[i] != c) + return i; + } + return npos; + } + + size_t first_not_of(const C c, size_t start) const { C4_ASSERT((start >= 0 && start <= len) || (start == len && len == 0)); for(size_t i = start; i < len; ++i) @@ -5956,7 +6139,17 @@ struct C4CORE_EXPORT basic_substring return npos; } - size_t last_not_of(const C c, size_t start=npos) const + size_t last_not_of(const C c) const + { + for(size_t i = len-1; i != size_t(-1); --i) + { + if(str[i] != c) + return i; + } + return npos; + } + + size_t last_not_of(const C c, size_t start) const { C4_ASSERT(start == npos || (start >= 0 && start <= len)); if(start == npos) @@ -5969,7 +6162,28 @@ struct C4CORE_EXPORT basic_substring return npos; } - size_t first_not_of(ro_substr chars, size_t start=0) const + size_t first_not_of(ro_substr chars) const + { + for(size_t i = 0; i < len; ++i) + { + bool gotit = true; + for(size_t j = 0; j < chars.len; ++j) + { + if(str[i] == chars.str[j]) + { + gotit = false; + break; + } + } + if(gotit) + { + return i; + } + } + return npos; + } + + size_t first_not_of(ro_substr chars, size_t start) const { C4_ASSERT((start >= 0 && start <= len) || (start == len && len == 0)); for(size_t i = start; i < len; ++i) @@ -5991,7 +6205,28 @@ struct C4CORE_EXPORT basic_substring return npos; } - size_t last_not_of(ro_substr chars, size_t start=npos) const + size_t last_not_of(ro_substr chars) const + { + for(size_t i = len-1; i != size_t(-1); --i) + { + bool gotit = true; + for(size_t j = 0; j < chars.len; ++j) + { + if(str[i] == chars.str[j]) + { + gotit = false; + break; + } + } + if(gotit) + { + return i; + } + } + return npos; + } + + size_t last_not_of(ro_substr chars, size_t start) const { C4_ASSERT(start == npos || (start >= 0 && start <= len)); if(start == npos) @@ -6174,7 +6409,7 @@ struct C4CORE_EXPORT basic_substring return ne; if(ne.str[0] == '-') return first(0); - size_t skip_start = (ne.str[0] == '+') ? 1 : 0; + size_t skip_start = size_t(ne.str[0] == '+'); return ne._first_integral_span(skip_start); } @@ -6184,7 +6419,7 @@ struct C4CORE_EXPORT basic_substring basic_substring ne = first_non_empty_span(); if(ne.empty()) return ne; - size_t skip_start = (ne.str[0] == '+' || ne.str[0] == '-') ? 1 : 0; + size_t skip_start = size_t(ne.str[0] == '+' || ne.str[0] == '-'); return ne._first_integral_span(skip_start); } @@ -6259,7 +6494,7 @@ struct C4CORE_EXPORT basic_substring basic_substring ne = first_non_empty_span(); if(ne.empty()) return ne; - size_t skip_start = (ne.str[0] == '+' || ne.str[0] == '-'); + const size_t skip_start = (ne.str[0] == '+' || ne.str[0] == '-'); C4_ASSERT(skip_start == 0 || skip_start == 1); // if we have at least three digits after the leading sign, it // can be decimal, or hex, or bin or oct. Ex: @@ -6399,14 +6634,11 @@ struct C4CORE_EXPORT basic_substring power_part_dec: C4_ASSERT(pos > 0); C4_ASSERT(str[pos - 1] == 'e' || str[pos - 1] == 'E'); - // either a + or a - is expected here, followed by more chars. - // also, using (pos+1) in this check will cause an early - // return when no more chars follow the sign. - if(len <= (pos+1) || ((!intchars) && (!fracchars))) + // either digits, or +, or - are expected here, followed by more digits. + if((len == pos) || ((!intchars) && (!fracchars))) return first(0); - ++pos; // this was the sign. - // ... so the (pos+1) ensures that we enter the loop and - // hence that there exist chars in the power part + if(str[pos] == '-' || str[pos] == '+') + ++pos; // skip the sign powchars = false; for( ; pos < len; ++pos) { @@ -6418,7 +6650,7 @@ struct C4CORE_EXPORT basic_substring else return first(0); } - return *this; + return powchars ? *this : first(0); } // this function is declared inside the class to avoid a VS error with __declspec(dllimport) @@ -6706,7 +6938,7 @@ struct C4CORE_EXPORT basic_substring { if(C4_LIKELY(*start_pos < len)) { - for(size_t i = *start_pos, e = len; i < e; i++) + for(size_t i = *start_pos; i < len; i++) { if(str[i] == sep) { @@ -6722,13 +6954,13 @@ struct C4CORE_EXPORT basic_substring else { bool valid = len > 0 && (*start_pos == len); - if(valid && !empty() && str[len-1] == sep) + if(valid && str && str[len-1] == sep) { - out->assign(str + len, (size_t)0); // the cast is needed to prevent overload ambiguity + out->assign(str + len, size_t(0)); // the cast is needed to prevent overload ambiguity } else { - out->assign(str + len + 1, (size_t)0); // the cast is needed to prevent overload ambiguity + out->assign(str + len + 1, size_t(0)); // the cast is needed to prevent overload ambiguity } *start_pos = len + 1; return valid; @@ -7160,7 +7392,7 @@ struct C4CORE_EXPORT basic_substring } /** replace @p pattern with @p repl, and write the result into - * @dst. pattern and repl don't need equal sizes. + * @p dst. pattern and repl don't need equal sizes. * * @return the required size for dst. No overflow occurs if * dst.len is smaller than the required size; this can be used to @@ -7212,102 +7444,83 @@ struct C4CORE_EXPORT basic_substring #undef C4_REQUIRE_RW -#undef C4_REQUIRE_RO -#undef C4_NC2C //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- -/** Because of a C++ limitation, substr cannot provide simultaneous - * overloads for constructing from a char[N] and a char*; the latter - * will always be chosen by the compiler. So this specialization is - * provided to simplify obtaining a substr from a char*. Being a - * function has the advantage of highlighting the strlen() cost. - * - * @see to_csubstr - * @see For a more detailed explanation on why the overloads cannot - * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */ -inline substr to_substr(char *s) -{ - return substr(s, s ? strlen(s) : 0); -} - -/** Because of a C++ limitation, substr cannot provide simultaneous - * overloads for constructing from a char[N] and a char*; the latter - * will always be chosen by the compiler. So this specialization is - * provided to simplify obtaining a substr from a char*. Being a - * function has the advantage of highlighting the strlen() cost. - * - * @see to_substr - * @see For a more detailed explanation on why the overloads cannot - * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */ -inline csubstr to_csubstr(char *s) -{ - return csubstr(s, s ? strlen(s) : 0); -} -/** Because of a C++ limitation, substr cannot provide simultaneous - * overloads for constructing from a const char[N] and a const char*; - * the latter will always be chosen by the compiler. So this - * specialization is provided to simplify obtaining a substr from a - * char*. Being a function has the advantage of highlighting the - * strlen() cost. +/** @defgroup doc_substr_adapters substr adapters * - * @overload to_csubstr - * @see to_substr - * @see For a more detailed explanation on why the overloads cannot - * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */ -inline csubstr to_csubstr(const char *s) -{ - return csubstr(s, s ? strlen(s) : 0); -} + * to_substr() and to_csubstr() is used in generic code like + * format(), and allow adding construction of substrings from new + * types like containers. + * @{ */ /** neutral version for use in generic code */ -inline csubstr to_csubstr(csubstr s) -{ - return s; -} - +C4_ALWAYS_INLINE substr to_substr(substr s) noexcept { return s; } /** neutral version for use in generic code */ -inline csubstr to_csubstr(substr s) -{ - return s; -} - +C4_ALWAYS_INLINE csubstr to_csubstr(substr s) noexcept { return s; } /** neutral version for use in generic code */ -inline substr to_substr(substr s) -{ - return s; -} +C4_ALWAYS_INLINE csubstr to_csubstr(csubstr s) noexcept { return s; } + + +template +C4_ALWAYS_INLINE substr +to_substr(char (&s)[N]) noexcept { substr ss(s, N-1); return ss; } +template +C4_ALWAYS_INLINE csubstr +to_csubstr(const char (&s)[N]) noexcept { csubstr ss(s, N-1); return ss; } + + +/** @note this overload uses SFINAE to prevent it from overriding the array overload + * @see For a more detailed explanation on why the plain overloads cannot + * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */ +template +C4_ALWAYS_INLINE typename std::enable_if::value, substr>::type +to_substr(U s) noexcept { substr ss(s); return ss; } +/** @note this overload uses SFINAE to prevent it from overriding the array overload + * @see For a more detailed explanation on why the plain overloads cannot + * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */ +template +C4_ALWAYS_INLINE typename std::enable_if::value || std::is_same::value, csubstr>::type +to_csubstr(U s) noexcept { csubstr ss(s); return ss; } + + +/** @} */ //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- -template inline bool operator== (const C (&s)[N], basic_substring const that) { return that.compare(s) == 0; } -template inline bool operator!= (const C (&s)[N], basic_substring const that) { return that.compare(s) != 0; } -template inline bool operator< (const C (&s)[N], basic_substring const that) { return that.compare(s) > 0; } -template inline bool operator> (const C (&s)[N], basic_substring const that) { return that.compare(s) < 0; } -template inline bool operator<= (const C (&s)[N], basic_substring const that) { return that.compare(s) >= 0; } -template inline bool operator>= (const C (&s)[N], basic_substring const that) { return that.compare(s) <= 0; } +/** @defgroup doc_substr_cmp substr comparison operators + * @{ */ + +template inline bool operator== (const char (&s)[N], basic_substring const that) noexcept { return that.compare(s, N-1) == 0; } +template inline bool operator!= (const char (&s)[N], basic_substring const that) noexcept { return that.compare(s, N-1) != 0; } +template inline bool operator< (const char (&s)[N], basic_substring const that) noexcept { return that.compare(s, N-1) > 0; } +template inline bool operator> (const char (&s)[N], basic_substring const that) noexcept { return that.compare(s, N-1) < 0; } +template inline bool operator<= (const char (&s)[N], basic_substring const that) noexcept { return that.compare(s, N-1) >= 0; } +template inline bool operator>= (const char (&s)[N], basic_substring const that) noexcept { return that.compare(s, N-1) <= 0; } -template inline bool operator== (C const c, basic_substring const that) { return that.compare(c) == 0; } -template inline bool operator!= (C const c, basic_substring const that) { return that.compare(c) != 0; } -template inline bool operator< (C const c, basic_substring const that) { return that.compare(c) > 0; } -template inline bool operator> (C const c, basic_substring const that) { return that.compare(c) < 0; } -template inline bool operator<= (C const c, basic_substring const that) { return that.compare(c) >= 0; } -template inline bool operator>= (C const c, basic_substring const that) { return that.compare(c) <= 0; } +template inline bool operator== (const char c, basic_substring const that) noexcept { return that.compare(c) == 0; } +template inline bool operator!= (const char c, basic_substring const that) noexcept { return that.compare(c) != 0; } +template inline bool operator< (const char c, basic_substring const that) noexcept { return that.compare(c) > 0; } +template inline bool operator> (const char c, basic_substring const that) noexcept { return that.compare(c) < 0; } +template inline bool operator<= (const char c, basic_substring const that) noexcept { return that.compare(c) >= 0; } +template inline bool operator>= (const char c, basic_substring const that) noexcept { return that.compare(c) <= 0; } + +/** @} */ //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- -/** @define C4_SUBSTR_NO_OSTREAM_LSHIFT doctest does not deal well with +/* C4_SUBSTR_NO_OSTREAM_LSHIFT doctest does not deal well with * template operator<< * @see https://github.com/onqtam/doctest/pull/431 */ #ifndef C4_SUBSTR_NO_OSTREAM_LSHIFT @@ -7342,6 +7555,8 @@ inline OStream& operator<< (OStream& os, basic_substring s) #endif #endif // !C4_SUBSTR_NO_OSTREAM_LSHIFT +/** @} */ + } // namespace c4 @@ -7368,142 +7583,186 @@ inline OStream& operator<< (OStream& os, basic_substring s) #ifndef _C4_EXT_FAST_FLOAT_HPP_ #define _C4_EXT_FAST_FLOAT_HPP_ -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) # pragma warning(push) +# pragma warning(disable: 4365) // '=': conversion from 'const _Ty' to 'fast_float::limb', signed/unsigned mismatch # pragma warning(disable: 4996) // snprintf/scanf: this function or variable may be unsafe #elif defined(__clang__) || defined(__APPLE_CC__) || defined(_LIBCPP_VERSION) # pragma clang diagnostic push -# if (defined(__clang_major__) && _clang_major__ >= 9) || defined(__APPLE_CC__) +# if (defined(__clang_major__) && (__clang_major__ >= 9)) || defined(__APPLE_CC__) # pragma clang diagnostic ignored "-Wfortify-source" # endif # pragma clang diagnostic ignored "-Wshift-count-overflow" +# pragma clang diagnostic ignored "-Wold-style-cast" #elif defined(__GNUC__) # pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wnarrowing" +# pragma GCC diagnostic ignored "-Wconversion" # pragma GCC diagnostic ignored "-Wuseless-cast" +# pragma GCC diagnostic ignored "-Wold-style-cast" #endif // fast_float by Daniel Lemire // fast_float by João Paulo Magalhaes - - +// +// // with contributions from Eugene Golushkov // with contributions from Maksim Kita // with contributions from Marcin Wojdyr // with contributions from Neal Richardson // with contributions from Tim Paine // with contributions from Fabio Pellacini +// with contributions from Lénárd Szolnoki +// with contributions from Jan Pharago +// with contributions from Maya Warrier +// with contributions from Taha Khokhar +// +// +// MIT License Notice +// +// MIT License +// +// Copyright (c) 2021 The fast_float authors +// +// Permission is hereby granted, free of charge, to any +// person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the +// Software without restriction, including without +// limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice +// shall be included in all copies or substantial portions +// of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +// +#ifndef FASTFLOAT_CONSTEXPR_FEATURE_DETECT_H +#define FASTFLOAT_CONSTEXPR_FEATURE_DETECT_H -// Permission is hereby granted, free of charge, to any -// person obtaining a copy of this software and associated -// documentation files (the "Software"), to deal in the -// Software without restriction, including without -// limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of -// the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice -// shall be included in all copies or substantial portions -// of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF -// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT -// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS IN THE SOFTWARE. +#ifdef __has_include +#if __has_include() +#include +#endif +#endif +// Testing for https://wg21.link/N3652, adopted in C++14 +#if __cpp_constexpr >= 201304 +#define FASTFLOAT_CONSTEXPR14 constexpr +#else +#define FASTFLOAT_CONSTEXPR14 +#endif -#ifndef FASTFLOAT_FAST_FLOAT_H -#define FASTFLOAT_FAST_FLOAT_H +#if defined(__cpp_lib_bit_cast) && __cpp_lib_bit_cast >= 201806L +#define FASTFLOAT_HAS_BIT_CAST 1 +#else +#define FASTFLOAT_HAS_BIT_CAST 0 +#endif + +#if defined(__cpp_lib_is_constant_evaluated) && __cpp_lib_is_constant_evaluated >= 201811L +#define FASTFLOAT_HAS_IS_CONSTANT_EVALUATED 1 +#else +#define FASTFLOAT_HAS_IS_CONSTANT_EVALUATED 0 +#endif + +// Testing for relevant C++20 constexpr library features +#if FASTFLOAT_HAS_IS_CONSTANT_EVALUATED \ + && FASTFLOAT_HAS_BIT_CAST \ + && __cpp_lib_constexpr_algorithms >= 201806L /*For std::copy and std::fill*/ +#define FASTFLOAT_CONSTEXPR20 constexpr +#define FASTFLOAT_IS_CONSTEXPR 1 +#else +#define FASTFLOAT_CONSTEXPR20 +#define FASTFLOAT_IS_CONSTEXPR 0 +#endif +#endif // FASTFLOAT_CONSTEXPR_FEATURE_DETECT_H + +#ifndef FASTFLOAT_FLOAT_COMMON_H +#define FASTFLOAT_FLOAT_COMMON_H + +#include +//included above: +//#include +#include +//included above: +//#include +//included above: +//#include #include +#ifdef __has_include + #if __has_include() && (__cplusplus > 202002L || _MSVC_LANG > 202002L) + #include + #endif +#endif namespace fast_float { + +#define FASTFLOAT_JSONFMT (1 << 5) +#define FASTFLOAT_FORTRANFMT (1 << 6) + enum chars_format { - scientific = 1<<0, - fixed = 1<<2, - hex = 1<<3, - general = fixed | scientific + scientific = 1 << 0, + fixed = 1 << 2, + hex = 1 << 3, + no_infnan = 1 << 4, + // RFC 8259: https://datatracker.ietf.org/doc/html/rfc8259#section-6 + json = FASTFLOAT_JSONFMT | fixed | scientific | no_infnan, + // Extension of RFC 8259 where, e.g., "inf" and "nan" are allowed. + json_or_infnan = FASTFLOAT_JSONFMT | fixed | scientific, + fortran = FASTFLOAT_FORTRANFMT | fixed | scientific, + general = fixed | scientific }; - -struct from_chars_result { - const char *ptr; +template +struct from_chars_result_t { + UC const* ptr; std::errc ec; }; +using from_chars_result = from_chars_result_t; -struct parse_options { - constexpr explicit parse_options(chars_format fmt = chars_format::general, - char dot = '.') +template +struct parse_options_t { + constexpr explicit parse_options_t(chars_format fmt = chars_format::general, + UC dot = UC('.')) : format(fmt), decimal_point(dot) {} /** Which number formats are accepted */ chars_format format; /** The character used as decimal point */ - char decimal_point; + UC decimal_point; }; - -/** - * This function parses the character sequence [first,last) for a number. It parses floating-point numbers expecting - * a locale-indepent format equivalent to what is used by std::strtod in the default ("C") locale. - * The resulting floating-point value is the closest floating-point values (using either float or double), - * using the "round to even" convention for values that would otherwise fall right in-between two values. - * That is, we provide exact parsing according to the IEEE standard. - * - * Given a successful parse, the pointer (`ptr`) in the returned value is set to point right after the - * parsed number, and the `value` referenced is set to the parsed value. In case of error, the returned - * `ec` contains a representative error, otherwise the default (`std::errc()`) value is stored. - * - * The implementation does not throw and does not allocate memory (e.g., with `new` or `malloc`). - * - * Like the C++17 standard, the `fast_float::from_chars` functions take an optional last argument of - * the type `fast_float::chars_format`. It is a bitset value: we check whether - * `fmt & fast_float::chars_format::fixed` and `fmt & fast_float::chars_format::scientific` are set - * to determine whether we allowe the fixed point and scientific notation respectively. - * The default is `fast_float::chars_format::general` which allows both `fixed` and `scientific`. - */ -template -from_chars_result from_chars(const char *first, const char *last, - T &value, chars_format fmt = chars_format::general) noexcept; - -/** - * Like from_chars, but accepts an `options` argument to govern number parsing. - */ -template -from_chars_result from_chars_advanced(const char *first, const char *last, - T &value, parse_options options) noexcept; +using parse_options = parse_options_t; } -#endif // FASTFLOAT_FAST_FLOAT_H - - -#ifndef FASTFLOAT_FLOAT_COMMON_H -#define FASTFLOAT_FLOAT_COMMON_H -#include -//included above: -//#include -#include -//included above: -//#include +#if FASTFLOAT_HAS_BIT_CAST +#include +#endif #if (defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) \ || defined(__amd64) || defined(__aarch64__) || defined(_M_ARM64) \ || defined(__MINGW64__) \ || defined(__s390x__) \ || (defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || defined(__PPC64LE__)) \ - || defined(__EMSCRIPTEN__)) -#define FASTFLOAT_64BIT + || defined(__loongarch64) ) +#define FASTFLOAT_64BIT 1 #elif (defined(__i386) || defined(__i386__) || defined(_M_IX86) \ - || defined(__arm__) || defined(_M_ARM) \ - || defined(__MINGW32__)) -#define FASTFLOAT_32BIT + || defined(__arm__) || defined(_M_ARM) || defined(__ppc__) \ + || defined(__MINGW32__) || defined(__EMSCRIPTEN__)) +#define FASTFLOAT_32BIT 1 #else // Need to check incrementally, since SIZE_MAX is a size_t, avoid overflow. // We can never tell the register width, but the SIZE_MAX is a good approximation. @@ -7511,9 +7770,9 @@ from_chars_result from_chars_advanced(const char *first, const char *last, #if SIZE_MAX == 0xffff #error Unknown platform (16-bit, unsupported) #elif SIZE_MAX == 0xffffffff - #define FASTFLOAT_32BIT + #define FASTFLOAT_32BIT 1 #elif SIZE_MAX == 0xffffffffffffffff - #define FASTFLOAT_64BIT + #define FASTFLOAT_64BIT 1 #else #error Unknown platform (not 32-bit, not 64-bit?) #endif @@ -7528,15 +7787,23 @@ from_chars_result from_chars_advanced(const char *first, const char *last, #define FASTFLOAT_VISUAL_STUDIO 1 #endif -#ifdef _WIN32 +#if defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__ +#define FASTFLOAT_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +#elif defined _WIN32 #define FASTFLOAT_IS_BIG_ENDIAN 0 #else #if defined(__APPLE__) || defined(__FreeBSD__) #include #elif defined(sun) || defined(__sun) #include +#elif defined(__MVS__) +#include #else +#ifdef __has_include +#if __has_include() #include +#endif //__has_include() +#endif //__has_include #endif # #ifndef __BYTE_ORDER__ @@ -7556,6 +7823,38 @@ from_chars_result from_chars_advanced(const char *first, const char *last, #endif #endif +#if defined(__SSE2__) || \ + (defined(FASTFLOAT_VISUAL_STUDIO) && \ + (defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP == 2))) +#define FASTFLOAT_SSE2 1 +#endif + +#if defined(__aarch64__) || defined(_M_ARM64) +#define FASTFLOAT_NEON 1 +#endif + +#if defined(FASTFLOAT_SSE2) || defined(FASTFLOAT_NEON) +#define FASTFLOAT_HAS_SIMD 1 +#endif + +#if defined(__GNUC__) +// disable -Wcast-align=strict (GCC only) +#define FASTFLOAT_SIMD_DISABLE_WARNINGS \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wcast-align\"") +#else +#define FASTFLOAT_SIMD_DISABLE_WARNINGS +#endif + +#if defined(__GNUC__) +#define FASTFLOAT_SIMD_RESTORE_WARNINGS \ + _Pragma("GCC diagnostic pop") +#else +#define FASTFLOAT_SIMD_RESTORE_WARNINGS +#endif + + + #ifdef FASTFLOAT_VISUAL_STUDIO #define fastfloat_really_inline __forceinline #else @@ -7563,26 +7862,57 @@ from_chars_result from_chars_advanced(const char *first, const char *last, #endif #ifndef FASTFLOAT_ASSERT -#define FASTFLOAT_ASSERT(x) { if (!(x)) abort(); } +#define FASTFLOAT_ASSERT(x) { ((void)(x)); } #endif #ifndef FASTFLOAT_DEBUG_ASSERT -//included above: -//#include -#define FASTFLOAT_DEBUG_ASSERT(x) assert(x) +#define FASTFLOAT_DEBUG_ASSERT(x) { ((void)(x)); } #endif // rust style `try!()` macro, or `?` operator #define FASTFLOAT_TRY(x) { if (!(x)) return false; } +#define FASTFLOAT_ENABLE_IF(...) typename std::enable_if<(__VA_ARGS__), int>::type + + namespace fast_float { +fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() { +#if FASTFLOAT_HAS_IS_CONSTANT_EVALUATED + return std::is_constant_evaluated(); +#else + return false; +#endif +} + +template +fastfloat_really_inline constexpr bool is_supported_float_type() { + return std::is_same::value || std::is_same::value +#if __STDCPP_FLOAT32_T__ + || std::is_same::value +#endif +#if __STDCPP_FLOAT64_T__ + || std::is_same::value +#endif + ; +} + +template +fastfloat_really_inline constexpr bool is_supported_char_type() { + return + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value; +} + // Compares two ASCII strings in a case insensitive manner. -inline bool fastfloat_strncasecmp(const char *input1, const char *input2, - size_t length) { +template +inline FASTFLOAT_CONSTEXPR14 bool +fastfloat_strncasecmp(UC const * input1, UC const * input2, size_t length) { char running_diff{0}; - for (size_t i = 0; i < length; i++) { - running_diff |= (input1[i] ^ input2[i]); + for (size_t i = 0; i < length; ++i) { + running_diff |= (char(input1[i]) ^ char(input2[i])); } return (running_diff == 0) || (running_diff == 32); } @@ -7596,14 +7926,14 @@ template struct span { const T* ptr; size_t length; - span(const T* _ptr, size_t _length) : ptr(_ptr), length(_length) {} - span() : ptr(nullptr), length(0) {} + constexpr span(const T* _ptr, size_t _length) : ptr(_ptr), length(_length) {} + constexpr span() : ptr(nullptr), length(0) {} constexpr size_t len() const noexcept { return length; } - const T& operator[](size_t index) const noexcept { + FASTFLOAT_CONSTEXPR14 const T& operator[](size_t index) const noexcept { FASTFLOAT_DEBUG_ASSERT(index < length); return ptr[index]; } @@ -7612,13 +7942,29 @@ struct span { struct value128 { uint64_t low; uint64_t high; - value128(uint64_t _low, uint64_t _high) : low(_low), high(_high) {} - value128() : low(0), high(0) {} + constexpr value128(uint64_t _low, uint64_t _high) : low(_low), high(_high) {} + constexpr value128() : low(0), high(0) {} }; +/* Helper C++14 constexpr generic implementation of leading_zeroes */ +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 +int leading_zeroes_generic(uint64_t input_num, int last_bit = 0) { + if(input_num & uint64_t(0xffffffff00000000)) { input_num >>= 32; last_bit |= 32; } + if(input_num & uint64_t( 0xffff0000)) { input_num >>= 16; last_bit |= 16; } + if(input_num & uint64_t( 0xff00)) { input_num >>= 8; last_bit |= 8; } + if(input_num & uint64_t( 0xf0)) { input_num >>= 4; last_bit |= 4; } + if(input_num & uint64_t( 0xc)) { input_num >>= 2; last_bit |= 2; } + if(input_num & uint64_t( 0x2)) { /* input_num >>= 1; */ last_bit |= 1; } + return 63 - last_bit; +} + /* result might be undefined when input_num is zero */ -fastfloat_really_inline int leading_zeroes(uint64_t input_num) { +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +int leading_zeroes(uint64_t input_num) { assert(input_num > 0); + if (cpp20_and_in_constexpr()) { + return leading_zeroes_generic(input_num); + } #ifdef FASTFLOAT_VISUAL_STUDIO #if defined(_M_X64) || defined(_M_ARM64) unsigned long leading_zero = 0; @@ -7627,61 +7973,65 @@ fastfloat_really_inline int leading_zeroes(uint64_t input_num) { _BitScanReverse64(&leading_zero, input_num); return (int)(63 - leading_zero); #else - int last_bit = 0; - if(input_num & uint64_t(0xffffffff00000000)) input_num >>= 32, last_bit |= 32; - if(input_num & uint64_t( 0xffff0000)) input_num >>= 16, last_bit |= 16; - if(input_num & uint64_t( 0xff00)) input_num >>= 8, last_bit |= 8; - if(input_num & uint64_t( 0xf0)) input_num >>= 4, last_bit |= 4; - if(input_num & uint64_t( 0xc)) input_num >>= 2, last_bit |= 2; - if(input_num & uint64_t( 0x2)) input_num >>= 1, last_bit |= 1; - return 63 - last_bit; + return leading_zeroes_generic(input_num); #endif #else return __builtin_clzll(input_num); #endif } -#ifdef FASTFLOAT_32BIT - // slow emulation routine for 32-bit -fastfloat_really_inline uint64_t emulu(uint32_t x, uint32_t y) { +fastfloat_really_inline constexpr uint64_t emulu(uint32_t x, uint32_t y) { return x * (uint64_t)y; } -// slow emulation routine for 32-bit -#if !defined(__MINGW64__) -fastfloat_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, - uint64_t *hi) { +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 +uint64_t umul128_generic(uint64_t ab, uint64_t cd, uint64_t *hi) { uint64_t ad = emulu((uint32_t)(ab >> 32), (uint32_t)cd); uint64_t bd = emulu((uint32_t)ab, (uint32_t)cd); uint64_t adbc = ad + emulu((uint32_t)ab, (uint32_t)(cd >> 32)); - uint64_t adbc_carry = !!(adbc < ad); + uint64_t adbc_carry = (uint64_t)(adbc < ad); uint64_t lo = bd + (adbc << 32); *hi = emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + - (adbc_carry << 32) + !!(lo < bd); + (adbc_carry << 32) + (uint64_t)(lo < bd); return lo; } + +#ifdef FASTFLOAT_32BIT + +// slow emulation routine for 32-bit +#if !defined(__MINGW64__) +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 +uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + return umul128_generic(ab, cd, hi); +} #endif // !__MINGW64__ #endif // FASTFLOAT_32BIT // compute 64-bit a*b -fastfloat_really_inline value128 full_multiplication(uint64_t a, - uint64_t b) { +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +value128 full_multiplication(uint64_t a, uint64_t b) { + if (cpp20_and_in_constexpr()) { + value128 answer; + answer.low = umul128_generic(a, b, &answer.high); + return answer; + } value128 answer; -#ifdef _M_ARM64 +#if defined(_M_ARM64) && !defined(__MINGW32__) // ARM64 has native support for 64-bit multiplications, no need to emulate + // But MinGW on ARM64 doesn't have native support for 64-bit multiplications answer.high = __umulh(a, b); answer.low = a * b; #elif defined(FASTFLOAT_32BIT) || (defined(_WIN64) && !defined(__clang__)) answer.low = _umul128(a, b, &answer.high); // _umul128 not available on ARM64 -#elif defined(FASTFLOAT_64BIT) +#elif defined(FASTFLOAT_64BIT) && defined(__SIZEOF_INT128__) __uint128_t r = ((__uint128_t)a) * b; answer.low = uint64_t(r); answer.high = uint64_t(r >> 64); #else - #error Not implemented + answer.low = umul128_generic(a, b, &answer.high); #endif return answer; } @@ -7690,10 +8040,10 @@ struct adjusted_mantissa { uint64_t mantissa{0}; int32_t power2{0}; // a negative value indicates an invalid result adjusted_mantissa() = default; - bool operator==(const adjusted_mantissa &o) const { + constexpr bool operator==(const adjusted_mantissa &o) const { return mantissa == o.mantissa && power2 == o.power2; } - bool operator!=(const adjusted_mantissa &o) const { + constexpr bool operator!=(const adjusted_mantissa &o) const { return mantissa != o.mantissa || power2 != o.power2; } }; @@ -7701,28 +8051,119 @@ struct adjusted_mantissa { // Bias so we can get the real exponent with an invalid adjusted_mantissa. constexpr static int32_t invalid_am_bias = -0x8000; -constexpr static double powers_of_ten_double[] = { - 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, - 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22}; -constexpr static float powers_of_ten_float[] = {1e0, 1e1, 1e2, 1e3, 1e4, 1e5, - 1e6, 1e7, 1e8, 1e9, 1e10}; +// used for binary_format_lookup_tables::max_mantissa +constexpr uint64_t constant_55555 = 5 * 5 * 5 * 5 * 5; + +template +struct binary_format_lookup_tables; + +template struct binary_format : binary_format_lookup_tables { + using equiv_uint = typename std::conditional::type; -template struct binary_format { static inline constexpr int mantissa_explicit_bits(); static inline constexpr int minimum_exponent(); static inline constexpr int infinite_power(); static inline constexpr int sign_index(); - static inline constexpr int min_exponent_fast_path(); + static inline constexpr int min_exponent_fast_path(); // used when fegetround() == FE_TONEAREST static inline constexpr int max_exponent_fast_path(); static inline constexpr int max_exponent_round_to_even(); static inline constexpr int min_exponent_round_to_even(); - static inline constexpr uint64_t max_mantissa_fast_path(); + static inline constexpr uint64_t max_mantissa_fast_path(int64_t power); + static inline constexpr uint64_t max_mantissa_fast_path(); // used when fegetround() == FE_TONEAREST static inline constexpr int largest_power_of_ten(); static inline constexpr int smallest_power_of_ten(); static inline constexpr T exact_power_of_ten(int64_t power); static inline constexpr size_t max_digits(); + static inline constexpr equiv_uint exponent_mask(); + static inline constexpr equiv_uint mantissa_mask(); + static inline constexpr equiv_uint hidden_bit_mask(); +}; + +template +struct binary_format_lookup_tables { + static constexpr double powers_of_ten[] = { + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, + 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22}; + + // Largest integer value v so that (5**index * v) <= 1<<53. + // 0x10000000000000 == 1 << 53 + static constexpr uint64_t max_mantissa[] = { + 0x10000000000000, + 0x10000000000000 / 5, + 0x10000000000000 / (5 * 5), + 0x10000000000000 / (5 * 5 * 5), + 0x10000000000000 / (5 * 5 * 5 * 5), + 0x10000000000000 / (constant_55555), + 0x10000000000000 / (constant_55555 * 5), + 0x10000000000000 / (constant_55555 * 5 * 5), + 0x10000000000000 / (constant_55555 * 5 * 5 * 5), + 0x10000000000000 / (constant_55555 * 5 * 5 * 5 * 5), + 0x10000000000000 / (constant_55555 * constant_55555), + 0x10000000000000 / (constant_55555 * constant_55555 * 5), + 0x10000000000000 / (constant_55555 * constant_55555 * 5 * 5), + 0x10000000000000 / (constant_55555 * constant_55555 * 5 * 5 * 5), + 0x10000000000000 / (constant_55555 * constant_55555 * constant_55555), + 0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * 5), + 0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * 5 * 5), + 0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * 5 * 5 * 5), + 0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * 5 * 5 * 5 * 5), + 0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * constant_55555), + 0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * constant_55555 * 5), + 0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * constant_55555 * 5 * 5), + 0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * constant_55555 * 5 * 5 * 5), + 0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * constant_55555 * 5 * 5 * 5 * 5)}; }; +template +constexpr double binary_format_lookup_tables::powers_of_ten[]; + +template +constexpr uint64_t binary_format_lookup_tables::max_mantissa[]; + +template +struct binary_format_lookup_tables { + static constexpr float powers_of_ten[] = {1e0f, 1e1f, 1e2f, 1e3f, 1e4f, 1e5f, + 1e6f, 1e7f, 1e8f, 1e9f, 1e10f}; + + // Largest integer value v so that (5**index * v) <= 1<<24. + // 0x1000000 == 1<<24 + static constexpr uint64_t max_mantissa[] = { + 0x1000000, + 0x1000000 / 5, + 0x1000000 / (5 * 5), + 0x1000000 / (5 * 5 * 5), + 0x1000000 / (5 * 5 * 5 * 5), + 0x1000000 / (constant_55555), + 0x1000000 / (constant_55555 * 5), + 0x1000000 / (constant_55555 * 5 * 5), + 0x1000000 / (constant_55555 * 5 * 5 * 5), + 0x1000000 / (constant_55555 * 5 * 5 * 5 * 5), + 0x1000000 / (constant_55555 * constant_55555), + 0x1000000 / (constant_55555 * constant_55555 * 5)}; +}; + +template +constexpr float binary_format_lookup_tables::powers_of_ten[]; + +template +constexpr uint64_t binary_format_lookup_tables::max_mantissa[]; + +template <> inline constexpr int binary_format::min_exponent_fast_path() { +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + return 0; +#else + return -22; +#endif +} + +template <> inline constexpr int binary_format::min_exponent_fast_path() { +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + return 0; +#else + return -10; +#endif +} + template <> inline constexpr int binary_format::mantissa_explicit_bits() { return 52; } @@ -7763,21 +8204,6 @@ template <> inline constexpr int binary_format::infinite_power() { template <> inline constexpr int binary_format::sign_index() { return 63; } template <> inline constexpr int binary_format::sign_index() { return 31; } -template <> inline constexpr int binary_format::min_exponent_fast_path() { -#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) - return 0; -#else - return -22; -#endif -} -template <> inline constexpr int binary_format::min_exponent_fast_path() { -#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) - return 0; -#else - return -10; -#endif -} - template <> inline constexpr int binary_format::max_exponent_fast_path() { return 22; } @@ -7788,18 +8214,33 @@ template <> inline constexpr int binary_format::max_exponent_fast_path() template <> inline constexpr uint64_t binary_format::max_mantissa_fast_path() { return uint64_t(2) << mantissa_explicit_bits(); } +template <> inline constexpr uint64_t binary_format::max_mantissa_fast_path(int64_t power) { + // caller is responsible to ensure that + // power >= 0 && power <= 22 + // + // Work around clang bug https://godbolt.org/z/zedh7rrhc + return (void)max_mantissa[0], max_mantissa[power]; +} template <> inline constexpr uint64_t binary_format::max_mantissa_fast_path() { return uint64_t(2) << mantissa_explicit_bits(); } +template <> inline constexpr uint64_t binary_format::max_mantissa_fast_path(int64_t power) { + // caller is responsible to ensure that + // power >= 0 && power <= 10 + // + // Work around clang bug https://godbolt.org/z/zedh7rrhc + return (void)max_mantissa[0], max_mantissa[power]; +} template <> inline constexpr double binary_format::exact_power_of_ten(int64_t power) { - return powers_of_ten_double[power]; + // Work around clang bug https://godbolt.org/z/zedh7rrhc + return (void)powers_of_ten[0], powers_of_ten[power]; } template <> inline constexpr float binary_format::exact_power_of_ten(int64_t power) { - - return powers_of_ten_float[power]; + // Work around clang bug https://godbolt.org/z/zedh7rrhc + return (void)powers_of_ten[0], powers_of_ten[power]; } @@ -7818,7 +8259,7 @@ inline constexpr int binary_format::smallest_power_of_ten() { } template <> inline constexpr int binary_format::smallest_power_of_ten() { - return -65; + return -64; } template <> inline constexpr size_t binary_format::max_digits() { @@ -7828,29 +8269,245 @@ template <> inline constexpr size_t binary_format::max_digits() { return 114; } +template <> inline constexpr binary_format::equiv_uint + binary_format::exponent_mask() { + return 0x7F800000; +} +template <> inline constexpr binary_format::equiv_uint + binary_format::exponent_mask() { + return 0x7FF0000000000000; +} + +template <> inline constexpr binary_format::equiv_uint + binary_format::mantissa_mask() { + return 0x007FFFFF; +} +template <> inline constexpr binary_format::equiv_uint + binary_format::mantissa_mask() { + return 0x000FFFFFFFFFFFFF; +} + +template <> inline constexpr binary_format::equiv_uint + binary_format::hidden_bit_mask() { + return 0x00800000; +} +template <> inline constexpr binary_format::equiv_uint + binary_format::hidden_bit_mask() { + return 0x0010000000000000; +} + template -fastfloat_really_inline void to_float(bool negative, adjusted_mantissa am, T &value) { - uint64_t word = am.mantissa; - word |= uint64_t(am.power2) << binary_format::mantissa_explicit_bits(); - word = negative - ? word | (uint64_t(1) << binary_format::sign_index()) : word; -#if FASTFLOAT_IS_BIG_ENDIAN == 1 - if (std::is_same::value) { - ::memcpy(&value, (char *)&word + 4, sizeof(T)); // extract value at offset 4-7 if float on big-endian - } else { - ::memcpy(&value, &word, sizeof(T)); - } +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +void to_float(bool negative, adjusted_mantissa am, T &value) { + using fastfloat_uint = typename binary_format::equiv_uint; + fastfloat_uint word = (fastfloat_uint)am.mantissa; + word |= fastfloat_uint(am.power2) << binary_format::mantissa_explicit_bits(); + word |= fastfloat_uint(negative) << binary_format::sign_index(); +#if FASTFLOAT_HAS_BIT_CAST + value = std::bit_cast(word); #else - // For little-endian systems: - ::memcpy(&value, &word, sizeof(T)); + ::memcpy(&value, &word, sizeof(T)); +#endif +} + +#ifdef FASTFLOAT_SKIP_WHITE_SPACE // disabled by default +template +struct space_lut { + static constexpr bool value[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +}; + +template +constexpr bool space_lut::value[]; + +inline constexpr bool is_space(uint8_t c) { return space_lut<>::value[c]; } #endif + +template +static constexpr uint64_t int_cmp_zeros() +{ + static_assert((sizeof(UC) == 1) || (sizeof(UC) == 2) || (sizeof(UC) == 4), "Unsupported character size"); + return (sizeof(UC) == 1) ? 0x3030303030303030 : (sizeof(UC) == 2) ? (uint64_t(UC('0')) << 48 | uint64_t(UC('0')) << 32 | uint64_t(UC('0')) << 16 | UC('0')) : (uint64_t(UC('0')) << 32 | UC('0')); +} +template +static constexpr int int_cmp_len() +{ + return sizeof(uint64_t) / sizeof(UC); +} +template +static constexpr UC const * str_const_nan() +{ + return nullptr; +} +template<> +constexpr char const * str_const_nan() +{ + return "nan"; +} +template<> +constexpr wchar_t const * str_const_nan() +{ + return L"nan"; +} +template<> +constexpr char16_t const * str_const_nan() +{ + return u"nan"; +} +template<> +constexpr char32_t const * str_const_nan() +{ + return U"nan"; +} +template +static constexpr UC const * str_const_inf() +{ + return nullptr; +} +template<> +constexpr char const * str_const_inf() +{ + return "infinity"; +} +template<> +constexpr wchar_t const * str_const_inf() +{ + return L"infinity"; +} +template<> +constexpr char16_t const * str_const_inf() +{ + return u"infinity"; } +template<> +constexpr char32_t const * str_const_inf() +{ + return U"infinity"; +} + + +template +struct int_luts { + static constexpr uint8_t chdigit[] = { + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255, 255, 255, 255, 255, + 255, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 255, 255, 255, 255, 255, + 255, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 + }; + + static constexpr size_t maxdigits_u64[] = { + 64, 41, 32, 28, 25, 23, 22, 21, + 20, 19, 18, 18, 17, 17, 16, 16, + 16, 16, 15, 15, 15, 15, 14, 14, + 14, 14, 14, 14, 14, 13, 13, 13, + 13, 13, 13 + }; + + static constexpr uint64_t min_safe_u64[] = { + 9223372036854775808ull, 12157665459056928801ull, 4611686018427387904, 7450580596923828125, 4738381338321616896, + 3909821048582988049, 9223372036854775808ull, 12157665459056928801ull, 10000000000000000000ull, 5559917313492231481, + 2218611106740436992, 8650415919381337933, 2177953337809371136, 6568408355712890625, 1152921504606846976, + 2862423051509815793, 6746640616477458432, 15181127029874798299ull, 1638400000000000000, 3243919932521508681, + 6221821273427820544, 11592836324538749809ull, 876488338465357824, 1490116119384765625, 2481152873203736576, + 4052555153018976267, 6502111422497947648, 10260628712958602189ull, 15943230000000000000ull, 787662783788549761, + 1152921504606846976, 1667889514952984961, 2386420683693101056, 3379220508056640625, 4738381338321616896 + }; +}; + +template +constexpr uint8_t int_luts::chdigit[]; + +template +constexpr size_t int_luts::maxdigits_u64[]; + +template +constexpr uint64_t int_luts::min_safe_u64[]; + +template +fastfloat_really_inline +constexpr uint8_t ch_to_digit(UC c) { return int_luts<>::chdigit[static_cast(c)]; } + +fastfloat_really_inline +constexpr size_t max_digits_u64(int base) { return int_luts<>::maxdigits_u64[base - 2]; } + +// If a u64 is exactly max_digits_u64() in length, this is +// the value below which it has definitely overflowed. +fastfloat_really_inline +constexpr uint64_t min_safe_u64(int base) { return int_luts<>::min_safe_u64[base - 2]; } } // namespace fast_float #endif +#ifndef FASTFLOAT_FAST_FLOAT_H +#define FASTFLOAT_FAST_FLOAT_H + + +namespace fast_float { +/** + * This function parses the character sequence [first,last) for a number. It parses floating-point numbers expecting + * a locale-indepent format equivalent to what is used by std::strtod in the default ("C") locale. + * The resulting floating-point value is the closest floating-point values (using either float or double), + * using the "round to even" convention for values that would otherwise fall right in-between two values. + * That is, we provide exact parsing according to the IEEE standard. + * + * Given a successful parse, the pointer (`ptr`) in the returned value is set to point right after the + * parsed number, and the `value` referenced is set to the parsed value. In case of error, the returned + * `ec` contains a representative error, otherwise the default (`std::errc()`) value is stored. + * + * The implementation does not throw and does not allocate memory (e.g., with `new` or `malloc`). + * + * Like the C++17 standard, the `fast_float::from_chars` functions take an optional last argument of + * the type `fast_float::chars_format`. It is a bitset value: we check whether + * `fmt & fast_float::chars_format::fixed` and `fmt & fast_float::chars_format::scientific` are set + * to determine whether we allow the fixed point and scientific notation respectively. + * The default is `fast_float::chars_format::general` which allows both `fixed` and `scientific`. + */ +template())> +FASTFLOAT_CONSTEXPR20 +from_chars_result_t from_chars(UC const * first, UC const * last, + T &value, chars_format fmt = chars_format::general) noexcept; + +/** + * Like from_chars, but accepts an `options` argument to govern number parsing. + */ +template +FASTFLOAT_CONSTEXPR20 +from_chars_result_t from_chars_advanced(UC const * first, UC const * last, + T &value, parse_options_t options) noexcept; +/** +* from_chars for integer types. +*/ +template ())> +FASTFLOAT_CONSTEXPR20 +from_chars_result_t from_chars(UC const * first, UC const * last, T& value, int base = 10) noexcept; + +} // namespace fast_float +#endif // FASTFLOAT_FAST_FLOAT_H + #ifndef FASTFLOAT_ASCII_NUMBER_H #define FASTFLOAT_ASCII_NUMBER_H @@ -7861,15 +8518,39 @@ fastfloat_really_inline void to_float(bool negative, adjusted_mantissa am, T &va //included above: //#include #include +//included above: +//#include +//included above: +//#include +#ifdef FASTFLOAT_SSE2 +#include +#endif + +#ifdef FASTFLOAT_NEON +#include +#endif + namespace fast_float { +template +fastfloat_really_inline constexpr bool has_simd_opt() { +#ifdef FASTFLOAT_HAS_SIMD + return std::is_same::value; +#else + return false; +#endif +} + // Next function can be micro-optimized, but compilers are entirely // able to optimize it well. -fastfloat_really_inline bool is_integer(char c) noexcept { return c >= '0' && c <= '9'; } +template +fastfloat_really_inline constexpr bool is_integer(UC c) noexcept { + return !(c > UC('9') || c < UC('0')); +} -fastfloat_really_inline uint64_t byteswap(uint64_t val) { +fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) { return (val & 0xFF00000000000000) >> 56 | (val & 0x00FF000000000000) >> 40 | (val & 0x0000FF0000000000) >> 24 @@ -7880,7 +8561,18 @@ fastfloat_really_inline uint64_t byteswap(uint64_t val) { | (val & 0x00000000000000FF) << 56; } -fastfloat_really_inline uint64_t read_u64(const char *chars) { +// Read 8 UC into a u64. Truncates UC if not char. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +uint64_t read8_to_u64(const UC *chars) { + if (cpp20_and_in_constexpr() || !std::is_same::value) { + uint64_t val = 0; + for(int i = 0; i < 8; ++i) { + val |= uint64_t(uint8_t(*chars)) << (i*8); + ++chars; + } + return val; + } uint64_t val; ::memcpy(&val, chars, sizeof(uint64_t)); #if FASTFLOAT_IS_BIG_ENDIAN == 1 @@ -7890,123 +8582,284 @@ fastfloat_really_inline uint64_t read_u64(const char *chars) { return val; } -fastfloat_really_inline void write_u64(uint8_t *chars, uint64_t val) { -#if FASTFLOAT_IS_BIG_ENDIAN == 1 - // Need to read as-if the number was in little-endian order. - val = byteswap(val); +#ifdef FASTFLOAT_SSE2 + +fastfloat_really_inline +uint64_t simd_read8_to_u64(const __m128i data) { +FASTFLOAT_SIMD_DISABLE_WARNINGS + const __m128i packed = _mm_packus_epi16(data, data); +#ifdef FASTFLOAT_64BIT + return uint64_t(_mm_cvtsi128_si64(packed)); +#else + uint64_t value; + // Visual Studio + older versions of GCC don't support _mm_storeu_si64 + _mm_storel_epi64(reinterpret_cast<__m128i*>(&value), packed); + return value; #endif - ::memcpy(chars, &val, sizeof(uint64_t)); +FASTFLOAT_SIMD_RESTORE_WARNINGS } -// credit @aqrit -fastfloat_really_inline uint32_t parse_eight_digits_unrolled(uint64_t val) { - const uint64_t mask = 0x000000FF000000FF; - const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32) - const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32) - val -= 0x3030303030303030; - val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8; - val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; - return uint32_t(val); +fastfloat_really_inline +uint64_t simd_read8_to_u64(const char16_t* chars) { +FASTFLOAT_SIMD_DISABLE_WARNINGS + return simd_read8_to_u64(_mm_loadu_si128(reinterpret_cast(chars))); +FASTFLOAT_SIMD_RESTORE_WARNINGS } -fastfloat_really_inline uint32_t parse_eight_digits_unrolled(const char *chars) noexcept { - return parse_eight_digits_unrolled(read_u64(chars)); +#elif defined(FASTFLOAT_NEON) + + +fastfloat_really_inline +uint64_t simd_read8_to_u64(const uint16x8_t data) { +FASTFLOAT_SIMD_DISABLE_WARNINGS + uint8x8_t utf8_packed = vmovn_u16(data); + return vget_lane_u64(vreinterpret_u64_u8(utf8_packed), 0); +FASTFLOAT_SIMD_RESTORE_WARNINGS +} + +fastfloat_really_inline +uint64_t simd_read8_to_u64(const char16_t* chars) { +FASTFLOAT_SIMD_DISABLE_WARNINGS + return simd_read8_to_u64(vld1q_u16(reinterpret_cast(chars))); +FASTFLOAT_SIMD_RESTORE_WARNINGS +} + +#endif // FASTFLOAT_SSE2 + +// MSVC SFINAE is broken pre-VS2017 +#if defined(_MSC_VER) && _MSC_VER <= 1900 +template +#else +template ()) = 0> +#endif +// dummy for compile +uint64_t simd_read8_to_u64(UC const*) { + return 0; +} + +// credit @aqrit +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 +uint32_t parse_eight_digits_unrolled(uint64_t val) { + const uint64_t mask = 0x000000FF000000FF; + const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32) + const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32) + val -= 0x3030303030303030; + val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8; + val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; + return uint32_t(val); +} + + +// Call this if chars are definitely 8 digits. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +uint32_t parse_eight_digits_unrolled(UC const * chars) noexcept { + if (cpp20_and_in_constexpr() || !has_simd_opt()) { + return parse_eight_digits_unrolled(read8_to_u64(chars)); // truncation okay + } + return parse_eight_digits_unrolled(simd_read8_to_u64(chars)); } + // credit @aqrit -fastfloat_really_inline bool is_made_of_eight_digits_fast(uint64_t val) noexcept { +fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val) noexcept { return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) & 0x8080808080808080)); } -fastfloat_really_inline bool is_made_of_eight_digits_fast(const char *chars) noexcept { - return is_made_of_eight_digits_fast(read_u64(chars)); + +#ifdef FASTFLOAT_HAS_SIMD + +// Call this if chars might not be 8 digits. +// Using this style (instead of is_made_of_eight_digits_fast() then parse_eight_digits_unrolled()) +// ensures we don't load SIMD registers twice. +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +bool simd_parse_if_eight_digits_unrolled(const char16_t* chars, uint64_t& i) noexcept { + if (cpp20_and_in_constexpr()) { + return false; + } +#ifdef FASTFLOAT_SSE2 +FASTFLOAT_SIMD_DISABLE_WARNINGS + const __m128i data = _mm_loadu_si128(reinterpret_cast(chars)); + + // (x - '0') <= 9 + // http://0x80.pl/articles/simd-parsing-int-sequences.html + const __m128i t0 = _mm_add_epi16(data, _mm_set1_epi16(32720)); + const __m128i t1 = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-32759)); + + if (_mm_movemask_epi8(t1) == 0) { + i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data)); + return true; + } + else return false; +FASTFLOAT_SIMD_RESTORE_WARNINGS +#elif defined(FASTFLOAT_NEON) +FASTFLOAT_SIMD_DISABLE_WARNINGS + const uint16x8_t data = vld1q_u16(reinterpret_cast(chars)); + + // (x - '0') <= 9 + // http://0x80.pl/articles/simd-parsing-int-sequences.html + const uint16x8_t t0 = vsubq_u16(data, vmovq_n_u16('0')); + const uint16x8_t mask = vcltq_u16(t0, vmovq_n_u16('9' - '0' + 1)); + + if (vminvq_u16(mask) == 0xFFFF) { + i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data)); + return true; + } + else return false; +FASTFLOAT_SIMD_RESTORE_WARNINGS +#else + (void)chars; (void)i; + return false; +#endif // FASTFLOAT_SSE2 +} + +#endif // FASTFLOAT_HAS_SIMD + +// MSVC SFINAE is broken pre-VS2017 +#if defined(_MSC_VER) && _MSC_VER <= 1900 +template +#else +template ()) = 0> +#endif +// dummy for compile +bool simd_parse_if_eight_digits_unrolled(UC const*, uint64_t&) { + return 0; +} + + +template ::value) = 0> +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +void loop_parse_if_eight_digits(const UC*& p, const UC* const pend, uint64_t& i) { + if (!has_simd_opt()) { + return; + } + while ((std::distance(p, pend) >= 8) && simd_parse_if_eight_digits_unrolled(p, i)) { // in rare cases, this will overflow, but that's ok + p += 8; + } } -typedef span byte_span; +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +void loop_parse_if_eight_digits(const char*& p, const char* const pend, uint64_t& i) { + // optimizes better than parse_if_eight_digits_unrolled() for UC = char. + while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(read8_to_u64(p))) { + i = i * 100000000 + parse_eight_digits_unrolled(read8_to_u64(p)); // in rare cases, this will overflow, but that's ok + p += 8; + } +} -struct parsed_number_string { +template +struct parsed_number_string_t { int64_t exponent{0}; uint64_t mantissa{0}; - const char *lastmatch{nullptr}; + UC const * lastmatch{nullptr}; bool negative{false}; bool valid{false}; bool too_many_digits{false}; // contains the range of the significant digits - byte_span integer{}; // non-nullable - byte_span fraction{}; // nullable + span integer{}; // non-nullable + span fraction{}; // nullable }; +using byte_span = span; +using parsed_number_string = parsed_number_string_t; + // Assuming that you use no more than 19 digits, this will // parse an ASCII string. -fastfloat_really_inline -parsed_number_string parse_number_string(const char *p, const char *pend, parse_options options) noexcept { - const chars_format fmt = options.format; - const char decimal_point = options.decimal_point; +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +parsed_number_string_t parse_number_string(UC const *p, UC const * pend, parse_options_t options) noexcept { + chars_format const fmt = options.format; + UC const decimal_point = options.decimal_point; - parsed_number_string answer; + parsed_number_string_t answer; answer.valid = false; answer.too_many_digits = false; - answer.negative = (*p == '-'); - if (*p == '-') { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here + answer.negative = (*p == UC('-')); +#ifdef FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default + if ((*p == UC('-')) || (!(fmt & FASTFLOAT_JSONFMT) && *p == UC('+'))) { +#else + if (*p == UC('-')) { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here +#endif ++p; if (p == pend) { return answer; } - if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot - return answer; + if (fmt & FASTFLOAT_JSONFMT) { + if (!is_integer(*p)) { // a sign must be followed by an integer + return answer; + } + } else { + if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot + return answer; + } } } - const char *const start_digits = p; + UC const * const start_digits = p; uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad) - while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok - p += 8; - } while ((p != pend) && is_integer(*p)) { // a multiplication by 10 is cheaper than an arbitrary integer // multiplication i = 10 * i + - uint64_t(*p - '0'); // might overflow, we will handle the overflow later + uint64_t(*p - UC('0')); // might overflow, we will handle the overflow later ++p; } - const char *const end_of_integer_part = p; + UC const * const end_of_integer_part = p; int64_t digit_count = int64_t(end_of_integer_part - start_digits); - answer.integer = byte_span(start_digits, size_t(digit_count)); + answer.integer = span(start_digits, size_t(digit_count)); + if (fmt & FASTFLOAT_JSONFMT) { + // at least 1 digit in integer part, without leading zeros + if (digit_count == 0 || (start_digits[0] == UC('0') && digit_count > 1)) { + return answer; + } + } + int64_t exponent = 0; - if ((p != pend) && (*p == decimal_point)) { + const bool has_decimal_point = (p != pend) && (*p == decimal_point); + if (has_decimal_point) { ++p; - const char* before = p; + UC const * before = p; // can occur at most twice without overflowing, but let it occur more, since // for integers with many digits, digit parsing is the primary bottleneck. - while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok - p += 8; - } + loop_parse_if_eight_digits(p, pend, i); + while ((p != pend) && is_integer(*p)) { - uint8_t digit = uint8_t(*p - '0'); + uint8_t digit = uint8_t(*p - UC('0')); ++p; i = i * 10 + digit; // in rare cases, this will overflow, but that's ok } exponent = before - p; - answer.fraction = byte_span(before, size_t(p - before)); + answer.fraction = span(before, size_t(p - before)); digit_count -= exponent; } - // we must have encountered at least one integer! - if (digit_count == 0) { + if (fmt & FASTFLOAT_JSONFMT) { + // at least 1 digit in fractional part + if (has_decimal_point && exponent == 0) { + return answer; + } + } + else if (digit_count == 0) { // we must have encountered at least one integer! return answer; } int64_t exp_number = 0; // explicit exponential part - if ((fmt & chars_format::scientific) && (p != pend) && (('e' == *p) || ('E' == *p))) { - const char * location_of_e = p; - ++p; + if ( ((fmt & chars_format::scientific) && + (p != pend) && + ((UC('e') == *p) || (UC('E') == *p))) + || + ((fmt & FASTFLOAT_FORTRANFMT) && + (p != pend) && + ((UC('+') == *p) || (UC('-') == *p) || (UC('d') == *p) || (UC('D') == *p)))) { + UC const * location_of_e = p; + if ((UC('e') == *p) || (UC('E') == *p) || (UC('d') == *p) || (UC('D') == *p)) { + ++p; + } bool neg_exp = false; - if ((p != pend) && ('-' == *p)) { + if ((p != pend) && (UC('-') == *p)) { neg_exp = true; ++p; - } else if ((p != pend) && ('+' == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1) + } else if ((p != pend) && (UC('+') == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1) ++p; } if ((p == pend) || !is_integer(*p)) { @@ -8018,7 +8871,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ p = location_of_e; } else { while ((p != pend) && is_integer(*p)) { - uint8_t digit = uint8_t(*p - '0'); + uint8_t digit = uint8_t(*p - UC('0')); if (exp_number < 0x10000000) { exp_number = 10 * exp_number + digit; } @@ -8044,11 +8897,12 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ // We have to handle the case where we have 0.0000somenumber. // We need to be mindful of the case where we only have zeroes... // E.g., 0.000000000...000. - const char *start = start_digits; - while ((start != pend) && (*start == '0' || *start == decimal_point)) { - if(*start == '0') { digit_count --; } + UC const * start = start_digits; + while ((start != pend) && (*start == UC('0') || *start == decimal_point)) { + if(*start == UC('0')) { digit_count --; } start++; } + if (digit_count > 19) { answer.too_many_digits = true; // Let us start again, this time, avoiding overflows. @@ -8056,22 +8910,23 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ // pre-tokenized spans from above. i = 0; p = answer.integer.ptr; - const char* int_end = p + answer.integer.len(); - const uint64_t minimal_nineteen_digit_integer{1000000000000000000}; - while((i < minimal_nineteen_digit_integer) && (p != int_end)) { - i = i * 10 + uint64_t(*p - '0'); + UC const* int_end = p + answer.integer.len(); + const uint64_t minimal_nineteen_digit_integer{ 1000000000000000000 }; + while ((i < minimal_nineteen_digit_integer) && (p != int_end)) { + i = i * 10 + uint64_t(*p - UC('0')); ++p; } if (i >= minimal_nineteen_digit_integer) { // We have a big integers exponent = end_of_integer_part - p + exp_number; - } else { // We have a value with a fractional component. - p = answer.fraction.ptr; - const char* frac_end = p + answer.fraction.len(); - while((i < minimal_nineteen_digit_integer) && (p != frac_end)) { - i = i * 10 + uint64_t(*p - '0'); - ++p; - } - exponent = answer.fraction.ptr - p + exp_number; + } + else { // We have a value with a fractional component. + p = answer.fraction.ptr; + UC const* frac_end = p + answer.fraction.len(); + while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) { + i = i * 10 + uint64_t(*p - UC('0')); + ++p; + } + exponent = answer.fraction.ptr - p + exp_number; } // We have now corrected both exponent and i, to a truncated value } @@ -8081,10 +8936,110 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ return answer; } -} // namespace fast_float +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +from_chars_result_t parse_int_string(UC const* p, UC const* pend, T& value, int base) { + from_chars_result_t answer; + + UC const* const first = p; + + bool negative = (*p == UC('-')); + if (!std::is_signed::value && negative) { + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + return answer; + } +#ifdef FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default + if ((*p == UC('-')) || (*p == UC('+'))) { +#else + if (*p == UC('-')) { +#endif + ++p; + } + + UC const* const start_num = p; + + while (p!= pend && *p == UC('0')) { + ++p; + } + + const bool has_leading_zeros = p > start_num; + + UC const* const start_digits = p; + + uint64_t i = 0; + if (base == 10) { + loop_parse_if_eight_digits(p, pend, i); // use SIMD if possible + } + while (p != pend) { + uint8_t digit = ch_to_digit(*p); + if (digit >= base) { + break; + } + i = uint64_t(base) * i + digit; // might overflow, check this later + p++; + } + + size_t digit_count = size_t(p - start_digits); + + if (digit_count == 0) { + if (has_leading_zeros) { + value = 0; + answer.ec = std::errc(); + answer.ptr = p; + } + else { + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + } + return answer; + } + + answer.ptr = p; + + // check u64 overflow + size_t max_digits = max_digits_u64(base); + if (digit_count > max_digits) { + answer.ec = std::errc::result_out_of_range; + return answer; + } + // this check can be eliminated for all other types, but they will all require a max_digits(base) equivalent + if (digit_count == max_digits && i < min_safe_u64(base)) { + answer.ec = std::errc::result_out_of_range; + return answer; + } + // check other types overflow + if (!std::is_same::value) { + if (i > uint64_t(std::numeric_limits::max()) + uint64_t(negative)) { + answer.ec = std::errc::result_out_of_range; + return answer; + } + } + + if (negative) { +#ifdef FASTFLOAT_VISUAL_STUDIO +#pragma warning(push) +#pragma warning(disable: 4146) +#endif + // this weird workaround is required because: + // - converting unsigned to signed when its value is greater than signed max is UB pre-C++23. + // - reinterpret_casting (~i + 1) would work, but it is not constexpr + // this is always optimized into a neg instruction (note: T is an integer type) + value = T(-std::numeric_limits::max() - T(i - uint64_t(std::numeric_limits::max()))); +#ifdef FASTFLOAT_VISUAL_STUDIO +#pragma warning(pop) #endif + } + else { value = T(i); } + answer.ec = std::errc(); + return answer; +} + +} // namespace fast_float + +#endif #ifndef FASTFLOAT_FAST_TABLE_H #define FASTFLOAT_FAST_TABLE_H @@ -8106,11 +9061,11 @@ namespace fast_float { */ /** - * The smallest non-zero float (binary64) is 2^−1074. + * The smallest non-zero float (binary64) is 2^-1074. * We take as input numbers of the form w x 10^q where w < 2^64. * We have that w * 10^-343 < 2^(64-344) 5^-343 < 2^-1076. * However, we have that - * (2^64-1) * 10^-342 = (2^64-1) * 2^-342 * 5^-342 > 2^−1074. + * (2^64-1) * 10^-342 = (2^64-1) * 2^-342 * 5^-342 > 2^-1074. * Thus it is possible for a number of the form w * 10^-342 where * w is a 64-bit value to be a non-zero floating-point number. ********* @@ -8125,669 +9080,669 @@ constexpr static int smallest_power_of_five = binary_format::smallest_po constexpr static int largest_power_of_five = binary_format::largest_power_of_ten(); constexpr static int number_of_entries = 2 * (largest_power_of_five - smallest_power_of_five + 1); // Powers of five from 5^-342 all the way to 5^308 rounded toward one. -static const uint64_t power_of_five_128[number_of_entries]; +constexpr static uint64_t power_of_five_128[number_of_entries] = { + 0xeef453d6923bd65a,0x113faa2906a13b3f, + 0x9558b4661b6565f8,0x4ac7ca59a424c507, + 0xbaaee17fa23ebf76,0x5d79bcf00d2df649, + 0xe95a99df8ace6f53,0xf4d82c2c107973dc, + 0x91d8a02bb6c10594,0x79071b9b8a4be869, + 0xb64ec836a47146f9,0x9748e2826cdee284, + 0xe3e27a444d8d98b7,0xfd1b1b2308169b25, + 0x8e6d8c6ab0787f72,0xfe30f0f5e50e20f7, + 0xb208ef855c969f4f,0xbdbd2d335e51a935, + 0xde8b2b66b3bc4723,0xad2c788035e61382, + 0x8b16fb203055ac76,0x4c3bcb5021afcc31, + 0xaddcb9e83c6b1793,0xdf4abe242a1bbf3d, + 0xd953e8624b85dd78,0xd71d6dad34a2af0d, + 0x87d4713d6f33aa6b,0x8672648c40e5ad68, + 0xa9c98d8ccb009506,0x680efdaf511f18c2, + 0xd43bf0effdc0ba48,0x212bd1b2566def2, + 0x84a57695fe98746d,0x14bb630f7604b57, + 0xa5ced43b7e3e9188,0x419ea3bd35385e2d, + 0xcf42894a5dce35ea,0x52064cac828675b9, + 0x818995ce7aa0e1b2,0x7343efebd1940993, + 0xa1ebfb4219491a1f,0x1014ebe6c5f90bf8, + 0xca66fa129f9b60a6,0xd41a26e077774ef6, + 0xfd00b897478238d0,0x8920b098955522b4, + 0x9e20735e8cb16382,0x55b46e5f5d5535b0, + 0xc5a890362fddbc62,0xeb2189f734aa831d, + 0xf712b443bbd52b7b,0xa5e9ec7501d523e4, + 0x9a6bb0aa55653b2d,0x47b233c92125366e, + 0xc1069cd4eabe89f8,0x999ec0bb696e840a, + 0xf148440a256e2c76,0xc00670ea43ca250d, + 0x96cd2a865764dbca,0x380406926a5e5728, + 0xbc807527ed3e12bc,0xc605083704f5ecf2, + 0xeba09271e88d976b,0xf7864a44c633682e, + 0x93445b8731587ea3,0x7ab3ee6afbe0211d, + 0xb8157268fdae9e4c,0x5960ea05bad82964, + 0xe61acf033d1a45df,0x6fb92487298e33bd, + 0x8fd0c16206306bab,0xa5d3b6d479f8e056, + 0xb3c4f1ba87bc8696,0x8f48a4899877186c, + 0xe0b62e2929aba83c,0x331acdabfe94de87, + 0x8c71dcd9ba0b4925,0x9ff0c08b7f1d0b14, + 0xaf8e5410288e1b6f,0x7ecf0ae5ee44dd9, + 0xdb71e91432b1a24a,0xc9e82cd9f69d6150, + 0x892731ac9faf056e,0xbe311c083a225cd2, + 0xab70fe17c79ac6ca,0x6dbd630a48aaf406, + 0xd64d3d9db981787d,0x92cbbccdad5b108, + 0x85f0468293f0eb4e,0x25bbf56008c58ea5, + 0xa76c582338ed2621,0xaf2af2b80af6f24e, + 0xd1476e2c07286faa,0x1af5af660db4aee1, + 0x82cca4db847945ca,0x50d98d9fc890ed4d, + 0xa37fce126597973c,0xe50ff107bab528a0, + 0xcc5fc196fefd7d0c,0x1e53ed49a96272c8, + 0xff77b1fcbebcdc4f,0x25e8e89c13bb0f7a, + 0x9faacf3df73609b1,0x77b191618c54e9ac, + 0xc795830d75038c1d,0xd59df5b9ef6a2417, + 0xf97ae3d0d2446f25,0x4b0573286b44ad1d, + 0x9becce62836ac577,0x4ee367f9430aec32, + 0xc2e801fb244576d5,0x229c41f793cda73f, + 0xf3a20279ed56d48a,0x6b43527578c1110f, + 0x9845418c345644d6,0x830a13896b78aaa9, + 0xbe5691ef416bd60c,0x23cc986bc656d553, + 0xedec366b11c6cb8f,0x2cbfbe86b7ec8aa8, + 0x94b3a202eb1c3f39,0x7bf7d71432f3d6a9, + 0xb9e08a83a5e34f07,0xdaf5ccd93fb0cc53, + 0xe858ad248f5c22c9,0xd1b3400f8f9cff68, + 0x91376c36d99995be,0x23100809b9c21fa1, + 0xb58547448ffffb2d,0xabd40a0c2832a78a, + 0xe2e69915b3fff9f9,0x16c90c8f323f516c, + 0x8dd01fad907ffc3b,0xae3da7d97f6792e3, + 0xb1442798f49ffb4a,0x99cd11cfdf41779c, + 0xdd95317f31c7fa1d,0x40405643d711d583, + 0x8a7d3eef7f1cfc52,0x482835ea666b2572, + 0xad1c8eab5ee43b66,0xda3243650005eecf, + 0xd863b256369d4a40,0x90bed43e40076a82, + 0x873e4f75e2224e68,0x5a7744a6e804a291, + 0xa90de3535aaae202,0x711515d0a205cb36, + 0xd3515c2831559a83,0xd5a5b44ca873e03, + 0x8412d9991ed58091,0xe858790afe9486c2, + 0xa5178fff668ae0b6,0x626e974dbe39a872, + 0xce5d73ff402d98e3,0xfb0a3d212dc8128f, + 0x80fa687f881c7f8e,0x7ce66634bc9d0b99, + 0xa139029f6a239f72,0x1c1fffc1ebc44e80, + 0xc987434744ac874e,0xa327ffb266b56220, + 0xfbe9141915d7a922,0x4bf1ff9f0062baa8, + 0x9d71ac8fada6c9b5,0x6f773fc3603db4a9, + 0xc4ce17b399107c22,0xcb550fb4384d21d3, + 0xf6019da07f549b2b,0x7e2a53a146606a48, + 0x99c102844f94e0fb,0x2eda7444cbfc426d, + 0xc0314325637a1939,0xfa911155fefb5308, + 0xf03d93eebc589f88,0x793555ab7eba27ca, + 0x96267c7535b763b5,0x4bc1558b2f3458de, + 0xbbb01b9283253ca2,0x9eb1aaedfb016f16, + 0xea9c227723ee8bcb,0x465e15a979c1cadc, + 0x92a1958a7675175f,0xbfacd89ec191ec9, + 0xb749faed14125d36,0xcef980ec671f667b, + 0xe51c79a85916f484,0x82b7e12780e7401a, + 0x8f31cc0937ae58d2,0xd1b2ecb8b0908810, + 0xb2fe3f0b8599ef07,0x861fa7e6dcb4aa15, + 0xdfbdcece67006ac9,0x67a791e093e1d49a, + 0x8bd6a141006042bd,0xe0c8bb2c5c6d24e0, + 0xaecc49914078536d,0x58fae9f773886e18, + 0xda7f5bf590966848,0xaf39a475506a899e, + 0x888f99797a5e012d,0x6d8406c952429603, + 0xaab37fd7d8f58178,0xc8e5087ba6d33b83, + 0xd5605fcdcf32e1d6,0xfb1e4a9a90880a64, + 0x855c3be0a17fcd26,0x5cf2eea09a55067f, + 0xa6b34ad8c9dfc06f,0xf42faa48c0ea481e, + 0xd0601d8efc57b08b,0xf13b94daf124da26, + 0x823c12795db6ce57,0x76c53d08d6b70858, + 0xa2cb1717b52481ed,0x54768c4b0c64ca6e, + 0xcb7ddcdda26da268,0xa9942f5dcf7dfd09, + 0xfe5d54150b090b02,0xd3f93b35435d7c4c, + 0x9efa548d26e5a6e1,0xc47bc5014a1a6daf, + 0xc6b8e9b0709f109a,0x359ab6419ca1091b, + 0xf867241c8cc6d4c0,0xc30163d203c94b62, + 0x9b407691d7fc44f8,0x79e0de63425dcf1d, + 0xc21094364dfb5636,0x985915fc12f542e4, + 0xf294b943e17a2bc4,0x3e6f5b7b17b2939d, + 0x979cf3ca6cec5b5a,0xa705992ceecf9c42, + 0xbd8430bd08277231,0x50c6ff782a838353, + 0xece53cec4a314ebd,0xa4f8bf5635246428, + 0x940f4613ae5ed136,0x871b7795e136be99, + 0xb913179899f68584,0x28e2557b59846e3f, + 0xe757dd7ec07426e5,0x331aeada2fe589cf, + 0x9096ea6f3848984f,0x3ff0d2c85def7621, + 0xb4bca50b065abe63,0xfed077a756b53a9, + 0xe1ebce4dc7f16dfb,0xd3e8495912c62894, + 0x8d3360f09cf6e4bd,0x64712dd7abbbd95c, + 0xb080392cc4349dec,0xbd8d794d96aacfb3, + 0xdca04777f541c567,0xecf0d7a0fc5583a0, + 0x89e42caaf9491b60,0xf41686c49db57244, + 0xac5d37d5b79b6239,0x311c2875c522ced5, + 0xd77485cb25823ac7,0x7d633293366b828b, + 0x86a8d39ef77164bc,0xae5dff9c02033197, + 0xa8530886b54dbdeb,0xd9f57f830283fdfc, + 0xd267caa862a12d66,0xd072df63c324fd7b, + 0x8380dea93da4bc60,0x4247cb9e59f71e6d, + 0xa46116538d0deb78,0x52d9be85f074e608, + 0xcd795be870516656,0x67902e276c921f8b, + 0x806bd9714632dff6,0xba1cd8a3db53b6, + 0xa086cfcd97bf97f3,0x80e8a40eccd228a4, + 0xc8a883c0fdaf7df0,0x6122cd128006b2cd, + 0xfad2a4b13d1b5d6c,0x796b805720085f81, + 0x9cc3a6eec6311a63,0xcbe3303674053bb0, + 0xc3f490aa77bd60fc,0xbedbfc4411068a9c, + 0xf4f1b4d515acb93b,0xee92fb5515482d44, + 0x991711052d8bf3c5,0x751bdd152d4d1c4a, + 0xbf5cd54678eef0b6,0xd262d45a78a0635d, + 0xef340a98172aace4,0x86fb897116c87c34, + 0x9580869f0e7aac0e,0xd45d35e6ae3d4da0, + 0xbae0a846d2195712,0x8974836059cca109, + 0xe998d258869facd7,0x2bd1a438703fc94b, + 0x91ff83775423cc06,0x7b6306a34627ddcf, + 0xb67f6455292cbf08,0x1a3bc84c17b1d542, + 0xe41f3d6a7377eeca,0x20caba5f1d9e4a93, + 0x8e938662882af53e,0x547eb47b7282ee9c, + 0xb23867fb2a35b28d,0xe99e619a4f23aa43, + 0xdec681f9f4c31f31,0x6405fa00e2ec94d4, + 0x8b3c113c38f9f37e,0xde83bc408dd3dd04, + 0xae0b158b4738705e,0x9624ab50b148d445, + 0xd98ddaee19068c76,0x3badd624dd9b0957, + 0x87f8a8d4cfa417c9,0xe54ca5d70a80e5d6, + 0xa9f6d30a038d1dbc,0x5e9fcf4ccd211f4c, + 0xd47487cc8470652b,0x7647c3200069671f, + 0x84c8d4dfd2c63f3b,0x29ecd9f40041e073, + 0xa5fb0a17c777cf09,0xf468107100525890, + 0xcf79cc9db955c2cc,0x7182148d4066eeb4, + 0x81ac1fe293d599bf,0xc6f14cd848405530, + 0xa21727db38cb002f,0xb8ada00e5a506a7c, + 0xca9cf1d206fdc03b,0xa6d90811f0e4851c, + 0xfd442e4688bd304a,0x908f4a166d1da663, + 0x9e4a9cec15763e2e,0x9a598e4e043287fe, + 0xc5dd44271ad3cdba,0x40eff1e1853f29fd, + 0xf7549530e188c128,0xd12bee59e68ef47c, + 0x9a94dd3e8cf578b9,0x82bb74f8301958ce, + 0xc13a148e3032d6e7,0xe36a52363c1faf01, + 0xf18899b1bc3f8ca1,0xdc44e6c3cb279ac1, + 0x96f5600f15a7b7e5,0x29ab103a5ef8c0b9, + 0xbcb2b812db11a5de,0x7415d448f6b6f0e7, + 0xebdf661791d60f56,0x111b495b3464ad21, + 0x936b9fcebb25c995,0xcab10dd900beec34, + 0xb84687c269ef3bfb,0x3d5d514f40eea742, + 0xe65829b3046b0afa,0xcb4a5a3112a5112, + 0x8ff71a0fe2c2e6dc,0x47f0e785eaba72ab, + 0xb3f4e093db73a093,0x59ed216765690f56, + 0xe0f218b8d25088b8,0x306869c13ec3532c, + 0x8c974f7383725573,0x1e414218c73a13fb, + 0xafbd2350644eeacf,0xe5d1929ef90898fa, + 0xdbac6c247d62a583,0xdf45f746b74abf39, + 0x894bc396ce5da772,0x6b8bba8c328eb783, + 0xab9eb47c81f5114f,0x66ea92f3f326564, + 0xd686619ba27255a2,0xc80a537b0efefebd, + 0x8613fd0145877585,0xbd06742ce95f5f36, + 0xa798fc4196e952e7,0x2c48113823b73704, + 0xd17f3b51fca3a7a0,0xf75a15862ca504c5, + 0x82ef85133de648c4,0x9a984d73dbe722fb, + 0xa3ab66580d5fdaf5,0xc13e60d0d2e0ebba, + 0xcc963fee10b7d1b3,0x318df905079926a8, + 0xffbbcfe994e5c61f,0xfdf17746497f7052, + 0x9fd561f1fd0f9bd3,0xfeb6ea8bedefa633, + 0xc7caba6e7c5382c8,0xfe64a52ee96b8fc0, + 0xf9bd690a1b68637b,0x3dfdce7aa3c673b0, + 0x9c1661a651213e2d,0x6bea10ca65c084e, + 0xc31bfa0fe5698db8,0x486e494fcff30a62, + 0xf3e2f893dec3f126,0x5a89dba3c3efccfa, + 0x986ddb5c6b3a76b7,0xf89629465a75e01c, + 0xbe89523386091465,0xf6bbb397f1135823, + 0xee2ba6c0678b597f,0x746aa07ded582e2c, + 0x94db483840b717ef,0xa8c2a44eb4571cdc, + 0xba121a4650e4ddeb,0x92f34d62616ce413, + 0xe896a0d7e51e1566,0x77b020baf9c81d17, + 0x915e2486ef32cd60,0xace1474dc1d122e, + 0xb5b5ada8aaff80b8,0xd819992132456ba, + 0xe3231912d5bf60e6,0x10e1fff697ed6c69, + 0x8df5efabc5979c8f,0xca8d3ffa1ef463c1, + 0xb1736b96b6fd83b3,0xbd308ff8a6b17cb2, + 0xddd0467c64bce4a0,0xac7cb3f6d05ddbde, + 0x8aa22c0dbef60ee4,0x6bcdf07a423aa96b, + 0xad4ab7112eb3929d,0x86c16c98d2c953c6, + 0xd89d64d57a607744,0xe871c7bf077ba8b7, + 0x87625f056c7c4a8b,0x11471cd764ad4972, + 0xa93af6c6c79b5d2d,0xd598e40d3dd89bcf, + 0xd389b47879823479,0x4aff1d108d4ec2c3, + 0x843610cb4bf160cb,0xcedf722a585139ba, + 0xa54394fe1eedb8fe,0xc2974eb4ee658828, + 0xce947a3da6a9273e,0x733d226229feea32, + 0x811ccc668829b887,0x806357d5a3f525f, + 0xa163ff802a3426a8,0xca07c2dcb0cf26f7, + 0xc9bcff6034c13052,0xfc89b393dd02f0b5, + 0xfc2c3f3841f17c67,0xbbac2078d443ace2, + 0x9d9ba7832936edc0,0xd54b944b84aa4c0d, + 0xc5029163f384a931,0xa9e795e65d4df11, + 0xf64335bcf065d37d,0x4d4617b5ff4a16d5, + 0x99ea0196163fa42e,0x504bced1bf8e4e45, + 0xc06481fb9bcf8d39,0xe45ec2862f71e1d6, + 0xf07da27a82c37088,0x5d767327bb4e5a4c, + 0x964e858c91ba2655,0x3a6a07f8d510f86f, + 0xbbe226efb628afea,0x890489f70a55368b, + 0xeadab0aba3b2dbe5,0x2b45ac74ccea842e, + 0x92c8ae6b464fc96f,0x3b0b8bc90012929d, + 0xb77ada0617e3bbcb,0x9ce6ebb40173744, + 0xe55990879ddcaabd,0xcc420a6a101d0515, + 0x8f57fa54c2a9eab6,0x9fa946824a12232d, + 0xb32df8e9f3546564,0x47939822dc96abf9, + 0xdff9772470297ebd,0x59787e2b93bc56f7, + 0x8bfbea76c619ef36,0x57eb4edb3c55b65a, + 0xaefae51477a06b03,0xede622920b6b23f1, + 0xdab99e59958885c4,0xe95fab368e45eced, + 0x88b402f7fd75539b,0x11dbcb0218ebb414, + 0xaae103b5fcd2a881,0xd652bdc29f26a119, + 0xd59944a37c0752a2,0x4be76d3346f0495f, + 0x857fcae62d8493a5,0x6f70a4400c562ddb, + 0xa6dfbd9fb8e5b88e,0xcb4ccd500f6bb952, + 0xd097ad07a71f26b2,0x7e2000a41346a7a7, + 0x825ecc24c873782f,0x8ed400668c0c28c8, + 0xa2f67f2dfa90563b,0x728900802f0f32fa, + 0xcbb41ef979346bca,0x4f2b40a03ad2ffb9, + 0xfea126b7d78186bc,0xe2f610c84987bfa8, + 0x9f24b832e6b0f436,0xdd9ca7d2df4d7c9, + 0xc6ede63fa05d3143,0x91503d1c79720dbb, + 0xf8a95fcf88747d94,0x75a44c6397ce912a, + 0x9b69dbe1b548ce7c,0xc986afbe3ee11aba, + 0xc24452da229b021b,0xfbe85badce996168, + 0xf2d56790ab41c2a2,0xfae27299423fb9c3, + 0x97c560ba6b0919a5,0xdccd879fc967d41a, + 0xbdb6b8e905cb600f,0x5400e987bbc1c920, + 0xed246723473e3813,0x290123e9aab23b68, + 0x9436c0760c86e30b,0xf9a0b6720aaf6521, + 0xb94470938fa89bce,0xf808e40e8d5b3e69, + 0xe7958cb87392c2c2,0xb60b1d1230b20e04, + 0x90bd77f3483bb9b9,0xb1c6f22b5e6f48c2, + 0xb4ecd5f01a4aa828,0x1e38aeb6360b1af3, + 0xe2280b6c20dd5232,0x25c6da63c38de1b0, + 0x8d590723948a535f,0x579c487e5a38ad0e, + 0xb0af48ec79ace837,0x2d835a9df0c6d851, + 0xdcdb1b2798182244,0xf8e431456cf88e65, + 0x8a08f0f8bf0f156b,0x1b8e9ecb641b58ff, + 0xac8b2d36eed2dac5,0xe272467e3d222f3f, + 0xd7adf884aa879177,0x5b0ed81dcc6abb0f, + 0x86ccbb52ea94baea,0x98e947129fc2b4e9, + 0xa87fea27a539e9a5,0x3f2398d747b36224, + 0xd29fe4b18e88640e,0x8eec7f0d19a03aad, + 0x83a3eeeef9153e89,0x1953cf68300424ac, + 0xa48ceaaab75a8e2b,0x5fa8c3423c052dd7, + 0xcdb02555653131b6,0x3792f412cb06794d, + 0x808e17555f3ebf11,0xe2bbd88bbee40bd0, + 0xa0b19d2ab70e6ed6,0x5b6aceaeae9d0ec4, + 0xc8de047564d20a8b,0xf245825a5a445275, + 0xfb158592be068d2e,0xeed6e2f0f0d56712, + 0x9ced737bb6c4183d,0x55464dd69685606b, + 0xc428d05aa4751e4c,0xaa97e14c3c26b886, + 0xf53304714d9265df,0xd53dd99f4b3066a8, + 0x993fe2c6d07b7fab,0xe546a8038efe4029, + 0xbf8fdb78849a5f96,0xde98520472bdd033, + 0xef73d256a5c0f77c,0x963e66858f6d4440, + 0x95a8637627989aad,0xdde7001379a44aa8, + 0xbb127c53b17ec159,0x5560c018580d5d52, + 0xe9d71b689dde71af,0xaab8f01e6e10b4a6, + 0x9226712162ab070d,0xcab3961304ca70e8, + 0xb6b00d69bb55c8d1,0x3d607b97c5fd0d22, + 0xe45c10c42a2b3b05,0x8cb89a7db77c506a, + 0x8eb98a7a9a5b04e3,0x77f3608e92adb242, + 0xb267ed1940f1c61c,0x55f038b237591ed3, + 0xdf01e85f912e37a3,0x6b6c46dec52f6688, + 0x8b61313bbabce2c6,0x2323ac4b3b3da015, + 0xae397d8aa96c1b77,0xabec975e0a0d081a, + 0xd9c7dced53c72255,0x96e7bd358c904a21, + 0x881cea14545c7575,0x7e50d64177da2e54, + 0xaa242499697392d2,0xdde50bd1d5d0b9e9, + 0xd4ad2dbfc3d07787,0x955e4ec64b44e864, + 0x84ec3c97da624ab4,0xbd5af13bef0b113e, + 0xa6274bbdd0fadd61,0xecb1ad8aeacdd58e, + 0xcfb11ead453994ba,0x67de18eda5814af2, + 0x81ceb32c4b43fcf4,0x80eacf948770ced7, + 0xa2425ff75e14fc31,0xa1258379a94d028d, + 0xcad2f7f5359a3b3e,0x96ee45813a04330, + 0xfd87b5f28300ca0d,0x8bca9d6e188853fc, + 0x9e74d1b791e07e48,0x775ea264cf55347e, + 0xc612062576589dda,0x95364afe032a819e, + 0xf79687aed3eec551,0x3a83ddbd83f52205, + 0x9abe14cd44753b52,0xc4926a9672793543, + 0xc16d9a0095928a27,0x75b7053c0f178294, + 0xf1c90080baf72cb1,0x5324c68b12dd6339, + 0x971da05074da7bee,0xd3f6fc16ebca5e04, + 0xbce5086492111aea,0x88f4bb1ca6bcf585, + 0xec1e4a7db69561a5,0x2b31e9e3d06c32e6, + 0x9392ee8e921d5d07,0x3aff322e62439fd0, + 0xb877aa3236a4b449,0x9befeb9fad487c3, + 0xe69594bec44de15b,0x4c2ebe687989a9b4, + 0x901d7cf73ab0acd9,0xf9d37014bf60a11, + 0xb424dc35095cd80f,0x538484c19ef38c95, + 0xe12e13424bb40e13,0x2865a5f206b06fba, + 0x8cbccc096f5088cb,0xf93f87b7442e45d4, + 0xafebff0bcb24aafe,0xf78f69a51539d749, + 0xdbe6fecebdedd5be,0xb573440e5a884d1c, + 0x89705f4136b4a597,0x31680a88f8953031, + 0xabcc77118461cefc,0xfdc20d2b36ba7c3e, + 0xd6bf94d5e57a42bc,0x3d32907604691b4d, + 0x8637bd05af6c69b5,0xa63f9a49c2c1b110, + 0xa7c5ac471b478423,0xfcf80dc33721d54, + 0xd1b71758e219652b,0xd3c36113404ea4a9, + 0x83126e978d4fdf3b,0x645a1cac083126ea, + 0xa3d70a3d70a3d70a,0x3d70a3d70a3d70a4, + 0xcccccccccccccccc,0xcccccccccccccccd, + 0x8000000000000000,0x0, + 0xa000000000000000,0x0, + 0xc800000000000000,0x0, + 0xfa00000000000000,0x0, + 0x9c40000000000000,0x0, + 0xc350000000000000,0x0, + 0xf424000000000000,0x0, + 0x9896800000000000,0x0, + 0xbebc200000000000,0x0, + 0xee6b280000000000,0x0, + 0x9502f90000000000,0x0, + 0xba43b74000000000,0x0, + 0xe8d4a51000000000,0x0, + 0x9184e72a00000000,0x0, + 0xb5e620f480000000,0x0, + 0xe35fa931a0000000,0x0, + 0x8e1bc9bf04000000,0x0, + 0xb1a2bc2ec5000000,0x0, + 0xde0b6b3a76400000,0x0, + 0x8ac7230489e80000,0x0, + 0xad78ebc5ac620000,0x0, + 0xd8d726b7177a8000,0x0, + 0x878678326eac9000,0x0, + 0xa968163f0a57b400,0x0, + 0xd3c21bcecceda100,0x0, + 0x84595161401484a0,0x0, + 0xa56fa5b99019a5c8,0x0, + 0xcecb8f27f4200f3a,0x0, + 0x813f3978f8940984,0x4000000000000000, + 0xa18f07d736b90be5,0x5000000000000000, + 0xc9f2c9cd04674ede,0xa400000000000000, + 0xfc6f7c4045812296,0x4d00000000000000, + 0x9dc5ada82b70b59d,0xf020000000000000, + 0xc5371912364ce305,0x6c28000000000000, + 0xf684df56c3e01bc6,0xc732000000000000, + 0x9a130b963a6c115c,0x3c7f400000000000, + 0xc097ce7bc90715b3,0x4b9f100000000000, + 0xf0bdc21abb48db20,0x1e86d40000000000, + 0x96769950b50d88f4,0x1314448000000000, + 0xbc143fa4e250eb31,0x17d955a000000000, + 0xeb194f8e1ae525fd,0x5dcfab0800000000, + 0x92efd1b8d0cf37be,0x5aa1cae500000000, + 0xb7abc627050305ad,0xf14a3d9e40000000, + 0xe596b7b0c643c719,0x6d9ccd05d0000000, + 0x8f7e32ce7bea5c6f,0xe4820023a2000000, + 0xb35dbf821ae4f38b,0xdda2802c8a800000, + 0xe0352f62a19e306e,0xd50b2037ad200000, + 0x8c213d9da502de45,0x4526f422cc340000, + 0xaf298d050e4395d6,0x9670b12b7f410000, + 0xdaf3f04651d47b4c,0x3c0cdd765f114000, + 0x88d8762bf324cd0f,0xa5880a69fb6ac800, + 0xab0e93b6efee0053,0x8eea0d047a457a00, + 0xd5d238a4abe98068,0x72a4904598d6d880, + 0x85a36366eb71f041,0x47a6da2b7f864750, + 0xa70c3c40a64e6c51,0x999090b65f67d924, + 0xd0cf4b50cfe20765,0xfff4b4e3f741cf6d, + 0x82818f1281ed449f,0xbff8f10e7a8921a4, + 0xa321f2d7226895c7,0xaff72d52192b6a0d, + 0xcbea6f8ceb02bb39,0x9bf4f8a69f764490, + 0xfee50b7025c36a08,0x2f236d04753d5b4, + 0x9f4f2726179a2245,0x1d762422c946590, + 0xc722f0ef9d80aad6,0x424d3ad2b7b97ef5, + 0xf8ebad2b84e0d58b,0xd2e0898765a7deb2, + 0x9b934c3b330c8577,0x63cc55f49f88eb2f, + 0xc2781f49ffcfa6d5,0x3cbf6b71c76b25fb, + 0xf316271c7fc3908a,0x8bef464e3945ef7a, + 0x97edd871cfda3a56,0x97758bf0e3cbb5ac, + 0xbde94e8e43d0c8ec,0x3d52eeed1cbea317, + 0xed63a231d4c4fb27,0x4ca7aaa863ee4bdd, + 0x945e455f24fb1cf8,0x8fe8caa93e74ef6a, + 0xb975d6b6ee39e436,0xb3e2fd538e122b44, + 0xe7d34c64a9c85d44,0x60dbbca87196b616, + 0x90e40fbeea1d3a4a,0xbc8955e946fe31cd, + 0xb51d13aea4a488dd,0x6babab6398bdbe41, + 0xe264589a4dcdab14,0xc696963c7eed2dd1, + 0x8d7eb76070a08aec,0xfc1e1de5cf543ca2, + 0xb0de65388cc8ada8,0x3b25a55f43294bcb, + 0xdd15fe86affad912,0x49ef0eb713f39ebe, + 0x8a2dbf142dfcc7ab,0x6e3569326c784337, + 0xacb92ed9397bf996,0x49c2c37f07965404, + 0xd7e77a8f87daf7fb,0xdc33745ec97be906, + 0x86f0ac99b4e8dafd,0x69a028bb3ded71a3, + 0xa8acd7c0222311bc,0xc40832ea0d68ce0c, + 0xd2d80db02aabd62b,0xf50a3fa490c30190, + 0x83c7088e1aab65db,0x792667c6da79e0fa, + 0xa4b8cab1a1563f52,0x577001b891185938, + 0xcde6fd5e09abcf26,0xed4c0226b55e6f86, + 0x80b05e5ac60b6178,0x544f8158315b05b4, + 0xa0dc75f1778e39d6,0x696361ae3db1c721, + 0xc913936dd571c84c,0x3bc3a19cd1e38e9, + 0xfb5878494ace3a5f,0x4ab48a04065c723, + 0x9d174b2dcec0e47b,0x62eb0d64283f9c76, + 0xc45d1df942711d9a,0x3ba5d0bd324f8394, + 0xf5746577930d6500,0xca8f44ec7ee36479, + 0x9968bf6abbe85f20,0x7e998b13cf4e1ecb, + 0xbfc2ef456ae276e8,0x9e3fedd8c321a67e, + 0xefb3ab16c59b14a2,0xc5cfe94ef3ea101e, + 0x95d04aee3b80ece5,0xbba1f1d158724a12, + 0xbb445da9ca61281f,0x2a8a6e45ae8edc97, + 0xea1575143cf97226,0xf52d09d71a3293bd, + 0x924d692ca61be758,0x593c2626705f9c56, + 0xb6e0c377cfa2e12e,0x6f8b2fb00c77836c, + 0xe498f455c38b997a,0xb6dfb9c0f956447, + 0x8edf98b59a373fec,0x4724bd4189bd5eac, + 0xb2977ee300c50fe7,0x58edec91ec2cb657, + 0xdf3d5e9bc0f653e1,0x2f2967b66737e3ed, + 0x8b865b215899f46c,0xbd79e0d20082ee74, + 0xae67f1e9aec07187,0xecd8590680a3aa11, + 0xda01ee641a708de9,0xe80e6f4820cc9495, + 0x884134fe908658b2,0x3109058d147fdcdd, + 0xaa51823e34a7eede,0xbd4b46f0599fd415, + 0xd4e5e2cdc1d1ea96,0x6c9e18ac7007c91a, + 0x850fadc09923329e,0x3e2cf6bc604ddb0, + 0xa6539930bf6bff45,0x84db8346b786151c, + 0xcfe87f7cef46ff16,0xe612641865679a63, + 0x81f14fae158c5f6e,0x4fcb7e8f3f60c07e, + 0xa26da3999aef7749,0xe3be5e330f38f09d, + 0xcb090c8001ab551c,0x5cadf5bfd3072cc5, + 0xfdcb4fa002162a63,0x73d9732fc7c8f7f6, + 0x9e9f11c4014dda7e,0x2867e7fddcdd9afa, + 0xc646d63501a1511d,0xb281e1fd541501b8, + 0xf7d88bc24209a565,0x1f225a7ca91a4226, + 0x9ae757596946075f,0x3375788de9b06958, + 0xc1a12d2fc3978937,0x52d6b1641c83ae, + 0xf209787bb47d6b84,0xc0678c5dbd23a49a, + 0x9745eb4d50ce6332,0xf840b7ba963646e0, + 0xbd176620a501fbff,0xb650e5a93bc3d898, + 0xec5d3fa8ce427aff,0xa3e51f138ab4cebe, + 0x93ba47c980e98cdf,0xc66f336c36b10137, + 0xb8a8d9bbe123f017,0xb80b0047445d4184, + 0xe6d3102ad96cec1d,0xa60dc059157491e5, + 0x9043ea1ac7e41392,0x87c89837ad68db2f, + 0xb454e4a179dd1877,0x29babe4598c311fb, + 0xe16a1dc9d8545e94,0xf4296dd6fef3d67a, + 0x8ce2529e2734bb1d,0x1899e4a65f58660c, + 0xb01ae745b101e9e4,0x5ec05dcff72e7f8f, + 0xdc21a1171d42645d,0x76707543f4fa1f73, + 0x899504ae72497eba,0x6a06494a791c53a8, + 0xabfa45da0edbde69,0x487db9d17636892, + 0xd6f8d7509292d603,0x45a9d2845d3c42b6, + 0x865b86925b9bc5c2,0xb8a2392ba45a9b2, + 0xa7f26836f282b732,0x8e6cac7768d7141e, + 0xd1ef0244af2364ff,0x3207d795430cd926, + 0x8335616aed761f1f,0x7f44e6bd49e807b8, + 0xa402b9c5a8d3a6e7,0x5f16206c9c6209a6, + 0xcd036837130890a1,0x36dba887c37a8c0f, + 0x802221226be55a64,0xc2494954da2c9789, + 0xa02aa96b06deb0fd,0xf2db9baa10b7bd6c, + 0xc83553c5c8965d3d,0x6f92829494e5acc7, + 0xfa42a8b73abbf48c,0xcb772339ba1f17f9, + 0x9c69a97284b578d7,0xff2a760414536efb, + 0xc38413cf25e2d70d,0xfef5138519684aba, + 0xf46518c2ef5b8cd1,0x7eb258665fc25d69, + 0x98bf2f79d5993802,0xef2f773ffbd97a61, + 0xbeeefb584aff8603,0xaafb550ffacfd8fa, + 0xeeaaba2e5dbf6784,0x95ba2a53f983cf38, + 0x952ab45cfa97a0b2,0xdd945a747bf26183, + 0xba756174393d88df,0x94f971119aeef9e4, + 0xe912b9d1478ceb17,0x7a37cd5601aab85d, + 0x91abb422ccb812ee,0xac62e055c10ab33a, + 0xb616a12b7fe617aa,0x577b986b314d6009, + 0xe39c49765fdf9d94,0xed5a7e85fda0b80b, + 0x8e41ade9fbebc27d,0x14588f13be847307, + 0xb1d219647ae6b31c,0x596eb2d8ae258fc8, + 0xde469fbd99a05fe3,0x6fca5f8ed9aef3bb, + 0x8aec23d680043bee,0x25de7bb9480d5854, + 0xada72ccc20054ae9,0xaf561aa79a10ae6a, + 0xd910f7ff28069da4,0x1b2ba1518094da04, + 0x87aa9aff79042286,0x90fb44d2f05d0842, + 0xa99541bf57452b28,0x353a1607ac744a53, + 0xd3fa922f2d1675f2,0x42889b8997915ce8, + 0x847c9b5d7c2e09b7,0x69956135febada11, + 0xa59bc234db398c25,0x43fab9837e699095, + 0xcf02b2c21207ef2e,0x94f967e45e03f4bb, + 0x8161afb94b44f57d,0x1d1be0eebac278f5, + 0xa1ba1ba79e1632dc,0x6462d92a69731732, + 0xca28a291859bbf93,0x7d7b8f7503cfdcfe, + 0xfcb2cb35e702af78,0x5cda735244c3d43e, + 0x9defbf01b061adab,0x3a0888136afa64a7, + 0xc56baec21c7a1916,0x88aaa1845b8fdd0, + 0xf6c69a72a3989f5b,0x8aad549e57273d45, + 0x9a3c2087a63f6399,0x36ac54e2f678864b, + 0xc0cb28a98fcf3c7f,0x84576a1bb416a7dd, + 0xf0fdf2d3f3c30b9f,0x656d44a2a11c51d5, + 0x969eb7c47859e743,0x9f644ae5a4b1b325, + 0xbc4665b596706114,0x873d5d9f0dde1fee, + 0xeb57ff22fc0c7959,0xa90cb506d155a7ea, + 0x9316ff75dd87cbd8,0x9a7f12442d588f2, + 0xb7dcbf5354e9bece,0xc11ed6d538aeb2f, + 0xe5d3ef282a242e81,0x8f1668c8a86da5fa, + 0x8fa475791a569d10,0xf96e017d694487bc, + 0xb38d92d760ec4455,0x37c981dcc395a9ac, + 0xe070f78d3927556a,0x85bbe253f47b1417, + 0x8c469ab843b89562,0x93956d7478ccec8e, + 0xaf58416654a6babb,0x387ac8d1970027b2, + 0xdb2e51bfe9d0696a,0x6997b05fcc0319e, + 0x88fcf317f22241e2,0x441fece3bdf81f03, + 0xab3c2fddeeaad25a,0xd527e81cad7626c3, + 0xd60b3bd56a5586f1,0x8a71e223d8d3b074, + 0x85c7056562757456,0xf6872d5667844e49, + 0xa738c6bebb12d16c,0xb428f8ac016561db, + 0xd106f86e69d785c7,0xe13336d701beba52, + 0x82a45b450226b39c,0xecc0024661173473, + 0xa34d721642b06084,0x27f002d7f95d0190, + 0xcc20ce9bd35c78a5,0x31ec038df7b441f4, + 0xff290242c83396ce,0x7e67047175a15271, + 0x9f79a169bd203e41,0xf0062c6e984d386, + 0xc75809c42c684dd1,0x52c07b78a3e60868, + 0xf92e0c3537826145,0xa7709a56ccdf8a82, + 0x9bbcc7a142b17ccb,0x88a66076400bb691, + 0xc2abf989935ddbfe,0x6acff893d00ea435, + 0xf356f7ebf83552fe,0x583f6b8c4124d43, + 0x98165af37b2153de,0xc3727a337a8b704a, + 0xbe1bf1b059e9a8d6,0x744f18c0592e4c5c, + 0xeda2ee1c7064130c,0x1162def06f79df73, + 0x9485d4d1c63e8be7,0x8addcb5645ac2ba8, + 0xb9a74a0637ce2ee1,0x6d953e2bd7173692, + 0xe8111c87c5c1ba99,0xc8fa8db6ccdd0437, + 0x910ab1d4db9914a0,0x1d9c9892400a22a2, + 0xb54d5e4a127f59c8,0x2503beb6d00cab4b, + 0xe2a0b5dc971f303a,0x2e44ae64840fd61d, + 0x8da471a9de737e24,0x5ceaecfed289e5d2, + 0xb10d8e1456105dad,0x7425a83e872c5f47, + 0xdd50f1996b947518,0xd12f124e28f77719, + 0x8a5296ffe33cc92f,0x82bd6b70d99aaa6f, + 0xace73cbfdc0bfb7b,0x636cc64d1001550b, + 0xd8210befd30efa5a,0x3c47f7e05401aa4e, + 0x8714a775e3e95c78,0x65acfaec34810a71, + 0xa8d9d1535ce3b396,0x7f1839a741a14d0d, + 0xd31045a8341ca07c,0x1ede48111209a050, + 0x83ea2b892091e44d,0x934aed0aab460432, + 0xa4e4b66b68b65d60,0xf81da84d5617853f, + 0xce1de40642e3f4b9,0x36251260ab9d668e, + 0x80d2ae83e9ce78f3,0xc1d72b7c6b426019, + 0xa1075a24e4421730,0xb24cf65b8612f81f, + 0xc94930ae1d529cfc,0xdee033f26797b627, + 0xfb9b7cd9a4a7443c,0x169840ef017da3b1, + 0x9d412e0806e88aa5,0x8e1f289560ee864e, + 0xc491798a08a2ad4e,0xf1a6f2bab92a27e2, + 0xf5b5d7ec8acb58a2,0xae10af696774b1db, + 0x9991a6f3d6bf1765,0xacca6da1e0a8ef29, + 0xbff610b0cc6edd3f,0x17fd090a58d32af3, + 0xeff394dcff8a948e,0xddfc4b4cef07f5b0, + 0x95f83d0a1fb69cd9,0x4abdaf101564f98e, + 0xbb764c4ca7a4440f,0x9d6d1ad41abe37f1, + 0xea53df5fd18d5513,0x84c86189216dc5ed, + 0x92746b9be2f8552c,0x32fd3cf5b4e49bb4, + 0xb7118682dbb66a77,0x3fbc8c33221dc2a1, + 0xe4d5e82392a40515,0xfabaf3feaa5334a, + 0x8f05b1163ba6832d,0x29cb4d87f2a7400e, + 0xb2c71d5bca9023f8,0x743e20e9ef511012, + 0xdf78e4b2bd342cf6,0x914da9246b255416, + 0x8bab8eefb6409c1a,0x1ad089b6c2f7548e, + 0xae9672aba3d0c320,0xa184ac2473b529b1, + 0xda3c0f568cc4f3e8,0xc9e5d72d90a2741e, + 0x8865899617fb1871,0x7e2fa67c7a658892, + 0xaa7eebfb9df9de8d,0xddbb901b98feeab7, + 0xd51ea6fa85785631,0x552a74227f3ea565, + 0x8533285c936b35de,0xd53a88958f87275f, + 0xa67ff273b8460356,0x8a892abaf368f137, + 0xd01fef10a657842c,0x2d2b7569b0432d85, + 0x8213f56a67f6b29b,0x9c3b29620e29fc73, + 0xa298f2c501f45f42,0x8349f3ba91b47b8f, + 0xcb3f2f7642717713,0x241c70a936219a73, + 0xfe0efb53d30dd4d7,0xed238cd383aa0110, + 0x9ec95d1463e8a506,0xf4363804324a40aa, + 0xc67bb4597ce2ce48,0xb143c6053edcd0d5, + 0xf81aa16fdc1b81da,0xdd94b7868e94050a, + 0x9b10a4e5e9913128,0xca7cf2b4191c8326, + 0xc1d4ce1f63f57d72,0xfd1c2f611f63a3f0, + 0xf24a01a73cf2dccf,0xbc633b39673c8cec, + 0x976e41088617ca01,0xd5be0503e085d813, + 0xbd49d14aa79dbc82,0x4b2d8644d8a74e18, + 0xec9c459d51852ba2,0xddf8e7d60ed1219e, + 0x93e1ab8252f33b45,0xcabb90e5c942b503, + 0xb8da1662e7b00a17,0x3d6a751f3b936243, + 0xe7109bfba19c0c9d,0xcc512670a783ad4, + 0x906a617d450187e2,0x27fb2b80668b24c5, + 0xb484f9dc9641e9da,0xb1f9f660802dedf6, + 0xe1a63853bbd26451,0x5e7873f8a0396973, + 0x8d07e33455637eb2,0xdb0b487b6423e1e8, + 0xb049dc016abc5e5f,0x91ce1a9a3d2cda62, + 0xdc5c5301c56b75f7,0x7641a140cc7810fb, + 0x89b9b3e11b6329ba,0xa9e904c87fcb0a9d, + 0xac2820d9623bf429,0x546345fa9fbdcd44, + 0xd732290fbacaf133,0xa97c177947ad4095, + 0x867f59a9d4bed6c0,0x49ed8eabcccc485d, + 0xa81f301449ee8c70,0x5c68f256bfff5a74, + 0xd226fc195c6a2f8c,0x73832eec6fff3111, + 0x83585d8fd9c25db7,0xc831fd53c5ff7eab, + 0xa42e74f3d032f525,0xba3e7ca8b77f5e55, + 0xcd3a1230c43fb26f,0x28ce1bd2e55f35eb, + 0x80444b5e7aa7cf85,0x7980d163cf5b81b3, + 0xa0555e361951c366,0xd7e105bcc332621f, + 0xc86ab5c39fa63440,0x8dd9472bf3fefaa7, + 0xfa856334878fc150,0xb14f98f6f0feb951, + 0x9c935e00d4b9d8d2,0x6ed1bf9a569f33d3, + 0xc3b8358109e84f07,0xa862f80ec4700c8, + 0xf4a642e14c6262c8,0xcd27bb612758c0fa, + 0x98e7e9cccfbd7dbd,0x8038d51cb897789c, + 0xbf21e44003acdd2c,0xe0470a63e6bd56c3, + 0xeeea5d5004981478,0x1858ccfce06cac74, + 0x95527a5202df0ccb,0xf37801e0c43ebc8, + 0xbaa718e68396cffd,0xd30560258f54e6ba, + 0xe950df20247c83fd,0x47c6b82ef32a2069, + 0x91d28b7416cdd27e,0x4cdc331d57fa5441, + 0xb6472e511c81471d,0xe0133fe4adf8e952, + 0xe3d8f9e563a198e5,0x58180fddd97723a6, + 0x8e679c2f5e44ff8f,0x570f09eaa7ea7648,}; }; template -const uint64_t powers_template::power_of_five_128[number_of_entries] = { - 0xeef453d6923bd65a,0x113faa2906a13b3f, - 0x9558b4661b6565f8,0x4ac7ca59a424c507, - 0xbaaee17fa23ebf76,0x5d79bcf00d2df649, - 0xe95a99df8ace6f53,0xf4d82c2c107973dc, - 0x91d8a02bb6c10594,0x79071b9b8a4be869, - 0xb64ec836a47146f9,0x9748e2826cdee284, - 0xe3e27a444d8d98b7,0xfd1b1b2308169b25, - 0x8e6d8c6ab0787f72,0xfe30f0f5e50e20f7, - 0xb208ef855c969f4f,0xbdbd2d335e51a935, - 0xde8b2b66b3bc4723,0xad2c788035e61382, - 0x8b16fb203055ac76,0x4c3bcb5021afcc31, - 0xaddcb9e83c6b1793,0xdf4abe242a1bbf3d, - 0xd953e8624b85dd78,0xd71d6dad34a2af0d, - 0x87d4713d6f33aa6b,0x8672648c40e5ad68, - 0xa9c98d8ccb009506,0x680efdaf511f18c2, - 0xd43bf0effdc0ba48,0x212bd1b2566def2, - 0x84a57695fe98746d,0x14bb630f7604b57, - 0xa5ced43b7e3e9188,0x419ea3bd35385e2d, - 0xcf42894a5dce35ea,0x52064cac828675b9, - 0x818995ce7aa0e1b2,0x7343efebd1940993, - 0xa1ebfb4219491a1f,0x1014ebe6c5f90bf8, - 0xca66fa129f9b60a6,0xd41a26e077774ef6, - 0xfd00b897478238d0,0x8920b098955522b4, - 0x9e20735e8cb16382,0x55b46e5f5d5535b0, - 0xc5a890362fddbc62,0xeb2189f734aa831d, - 0xf712b443bbd52b7b,0xa5e9ec7501d523e4, - 0x9a6bb0aa55653b2d,0x47b233c92125366e, - 0xc1069cd4eabe89f8,0x999ec0bb696e840a, - 0xf148440a256e2c76,0xc00670ea43ca250d, - 0x96cd2a865764dbca,0x380406926a5e5728, - 0xbc807527ed3e12bc,0xc605083704f5ecf2, - 0xeba09271e88d976b,0xf7864a44c633682e, - 0x93445b8731587ea3,0x7ab3ee6afbe0211d, - 0xb8157268fdae9e4c,0x5960ea05bad82964, - 0xe61acf033d1a45df,0x6fb92487298e33bd, - 0x8fd0c16206306bab,0xa5d3b6d479f8e056, - 0xb3c4f1ba87bc8696,0x8f48a4899877186c, - 0xe0b62e2929aba83c,0x331acdabfe94de87, - 0x8c71dcd9ba0b4925,0x9ff0c08b7f1d0b14, - 0xaf8e5410288e1b6f,0x7ecf0ae5ee44dd9, - 0xdb71e91432b1a24a,0xc9e82cd9f69d6150, - 0x892731ac9faf056e,0xbe311c083a225cd2, - 0xab70fe17c79ac6ca,0x6dbd630a48aaf406, - 0xd64d3d9db981787d,0x92cbbccdad5b108, - 0x85f0468293f0eb4e,0x25bbf56008c58ea5, - 0xa76c582338ed2621,0xaf2af2b80af6f24e, - 0xd1476e2c07286faa,0x1af5af660db4aee1, - 0x82cca4db847945ca,0x50d98d9fc890ed4d, - 0xa37fce126597973c,0xe50ff107bab528a0, - 0xcc5fc196fefd7d0c,0x1e53ed49a96272c8, - 0xff77b1fcbebcdc4f,0x25e8e89c13bb0f7a, - 0x9faacf3df73609b1,0x77b191618c54e9ac, - 0xc795830d75038c1d,0xd59df5b9ef6a2417, - 0xf97ae3d0d2446f25,0x4b0573286b44ad1d, - 0x9becce62836ac577,0x4ee367f9430aec32, - 0xc2e801fb244576d5,0x229c41f793cda73f, - 0xf3a20279ed56d48a,0x6b43527578c1110f, - 0x9845418c345644d6,0x830a13896b78aaa9, - 0xbe5691ef416bd60c,0x23cc986bc656d553, - 0xedec366b11c6cb8f,0x2cbfbe86b7ec8aa8, - 0x94b3a202eb1c3f39,0x7bf7d71432f3d6a9, - 0xb9e08a83a5e34f07,0xdaf5ccd93fb0cc53, - 0xe858ad248f5c22c9,0xd1b3400f8f9cff68, - 0x91376c36d99995be,0x23100809b9c21fa1, - 0xb58547448ffffb2d,0xabd40a0c2832a78a, - 0xe2e69915b3fff9f9,0x16c90c8f323f516c, - 0x8dd01fad907ffc3b,0xae3da7d97f6792e3, - 0xb1442798f49ffb4a,0x99cd11cfdf41779c, - 0xdd95317f31c7fa1d,0x40405643d711d583, - 0x8a7d3eef7f1cfc52,0x482835ea666b2572, - 0xad1c8eab5ee43b66,0xda3243650005eecf, - 0xd863b256369d4a40,0x90bed43e40076a82, - 0x873e4f75e2224e68,0x5a7744a6e804a291, - 0xa90de3535aaae202,0x711515d0a205cb36, - 0xd3515c2831559a83,0xd5a5b44ca873e03, - 0x8412d9991ed58091,0xe858790afe9486c2, - 0xa5178fff668ae0b6,0x626e974dbe39a872, - 0xce5d73ff402d98e3,0xfb0a3d212dc8128f, - 0x80fa687f881c7f8e,0x7ce66634bc9d0b99, - 0xa139029f6a239f72,0x1c1fffc1ebc44e80, - 0xc987434744ac874e,0xa327ffb266b56220, - 0xfbe9141915d7a922,0x4bf1ff9f0062baa8, - 0x9d71ac8fada6c9b5,0x6f773fc3603db4a9, - 0xc4ce17b399107c22,0xcb550fb4384d21d3, - 0xf6019da07f549b2b,0x7e2a53a146606a48, - 0x99c102844f94e0fb,0x2eda7444cbfc426d, - 0xc0314325637a1939,0xfa911155fefb5308, - 0xf03d93eebc589f88,0x793555ab7eba27ca, - 0x96267c7535b763b5,0x4bc1558b2f3458de, - 0xbbb01b9283253ca2,0x9eb1aaedfb016f16, - 0xea9c227723ee8bcb,0x465e15a979c1cadc, - 0x92a1958a7675175f,0xbfacd89ec191ec9, - 0xb749faed14125d36,0xcef980ec671f667b, - 0xe51c79a85916f484,0x82b7e12780e7401a, - 0x8f31cc0937ae58d2,0xd1b2ecb8b0908810, - 0xb2fe3f0b8599ef07,0x861fa7e6dcb4aa15, - 0xdfbdcece67006ac9,0x67a791e093e1d49a, - 0x8bd6a141006042bd,0xe0c8bb2c5c6d24e0, - 0xaecc49914078536d,0x58fae9f773886e18, - 0xda7f5bf590966848,0xaf39a475506a899e, - 0x888f99797a5e012d,0x6d8406c952429603, - 0xaab37fd7d8f58178,0xc8e5087ba6d33b83, - 0xd5605fcdcf32e1d6,0xfb1e4a9a90880a64, - 0x855c3be0a17fcd26,0x5cf2eea09a55067f, - 0xa6b34ad8c9dfc06f,0xf42faa48c0ea481e, - 0xd0601d8efc57b08b,0xf13b94daf124da26, - 0x823c12795db6ce57,0x76c53d08d6b70858, - 0xa2cb1717b52481ed,0x54768c4b0c64ca6e, - 0xcb7ddcdda26da268,0xa9942f5dcf7dfd09, - 0xfe5d54150b090b02,0xd3f93b35435d7c4c, - 0x9efa548d26e5a6e1,0xc47bc5014a1a6daf, - 0xc6b8e9b0709f109a,0x359ab6419ca1091b, - 0xf867241c8cc6d4c0,0xc30163d203c94b62, - 0x9b407691d7fc44f8,0x79e0de63425dcf1d, - 0xc21094364dfb5636,0x985915fc12f542e4, - 0xf294b943e17a2bc4,0x3e6f5b7b17b2939d, - 0x979cf3ca6cec5b5a,0xa705992ceecf9c42, - 0xbd8430bd08277231,0x50c6ff782a838353, - 0xece53cec4a314ebd,0xa4f8bf5635246428, - 0x940f4613ae5ed136,0x871b7795e136be99, - 0xb913179899f68584,0x28e2557b59846e3f, - 0xe757dd7ec07426e5,0x331aeada2fe589cf, - 0x9096ea6f3848984f,0x3ff0d2c85def7621, - 0xb4bca50b065abe63,0xfed077a756b53a9, - 0xe1ebce4dc7f16dfb,0xd3e8495912c62894, - 0x8d3360f09cf6e4bd,0x64712dd7abbbd95c, - 0xb080392cc4349dec,0xbd8d794d96aacfb3, - 0xdca04777f541c567,0xecf0d7a0fc5583a0, - 0x89e42caaf9491b60,0xf41686c49db57244, - 0xac5d37d5b79b6239,0x311c2875c522ced5, - 0xd77485cb25823ac7,0x7d633293366b828b, - 0x86a8d39ef77164bc,0xae5dff9c02033197, - 0xa8530886b54dbdeb,0xd9f57f830283fdfc, - 0xd267caa862a12d66,0xd072df63c324fd7b, - 0x8380dea93da4bc60,0x4247cb9e59f71e6d, - 0xa46116538d0deb78,0x52d9be85f074e608, - 0xcd795be870516656,0x67902e276c921f8b, - 0x806bd9714632dff6,0xba1cd8a3db53b6, - 0xa086cfcd97bf97f3,0x80e8a40eccd228a4, - 0xc8a883c0fdaf7df0,0x6122cd128006b2cd, - 0xfad2a4b13d1b5d6c,0x796b805720085f81, - 0x9cc3a6eec6311a63,0xcbe3303674053bb0, - 0xc3f490aa77bd60fc,0xbedbfc4411068a9c, - 0xf4f1b4d515acb93b,0xee92fb5515482d44, - 0x991711052d8bf3c5,0x751bdd152d4d1c4a, - 0xbf5cd54678eef0b6,0xd262d45a78a0635d, - 0xef340a98172aace4,0x86fb897116c87c34, - 0x9580869f0e7aac0e,0xd45d35e6ae3d4da0, - 0xbae0a846d2195712,0x8974836059cca109, - 0xe998d258869facd7,0x2bd1a438703fc94b, - 0x91ff83775423cc06,0x7b6306a34627ddcf, - 0xb67f6455292cbf08,0x1a3bc84c17b1d542, - 0xe41f3d6a7377eeca,0x20caba5f1d9e4a93, - 0x8e938662882af53e,0x547eb47b7282ee9c, - 0xb23867fb2a35b28d,0xe99e619a4f23aa43, - 0xdec681f9f4c31f31,0x6405fa00e2ec94d4, - 0x8b3c113c38f9f37e,0xde83bc408dd3dd04, - 0xae0b158b4738705e,0x9624ab50b148d445, - 0xd98ddaee19068c76,0x3badd624dd9b0957, - 0x87f8a8d4cfa417c9,0xe54ca5d70a80e5d6, - 0xa9f6d30a038d1dbc,0x5e9fcf4ccd211f4c, - 0xd47487cc8470652b,0x7647c3200069671f, - 0x84c8d4dfd2c63f3b,0x29ecd9f40041e073, - 0xa5fb0a17c777cf09,0xf468107100525890, - 0xcf79cc9db955c2cc,0x7182148d4066eeb4, - 0x81ac1fe293d599bf,0xc6f14cd848405530, - 0xa21727db38cb002f,0xb8ada00e5a506a7c, - 0xca9cf1d206fdc03b,0xa6d90811f0e4851c, - 0xfd442e4688bd304a,0x908f4a166d1da663, - 0x9e4a9cec15763e2e,0x9a598e4e043287fe, - 0xc5dd44271ad3cdba,0x40eff1e1853f29fd, - 0xf7549530e188c128,0xd12bee59e68ef47c, - 0x9a94dd3e8cf578b9,0x82bb74f8301958ce, - 0xc13a148e3032d6e7,0xe36a52363c1faf01, - 0xf18899b1bc3f8ca1,0xdc44e6c3cb279ac1, - 0x96f5600f15a7b7e5,0x29ab103a5ef8c0b9, - 0xbcb2b812db11a5de,0x7415d448f6b6f0e7, - 0xebdf661791d60f56,0x111b495b3464ad21, - 0x936b9fcebb25c995,0xcab10dd900beec34, - 0xb84687c269ef3bfb,0x3d5d514f40eea742, - 0xe65829b3046b0afa,0xcb4a5a3112a5112, - 0x8ff71a0fe2c2e6dc,0x47f0e785eaba72ab, - 0xb3f4e093db73a093,0x59ed216765690f56, - 0xe0f218b8d25088b8,0x306869c13ec3532c, - 0x8c974f7383725573,0x1e414218c73a13fb, - 0xafbd2350644eeacf,0xe5d1929ef90898fa, - 0xdbac6c247d62a583,0xdf45f746b74abf39, - 0x894bc396ce5da772,0x6b8bba8c328eb783, - 0xab9eb47c81f5114f,0x66ea92f3f326564, - 0xd686619ba27255a2,0xc80a537b0efefebd, - 0x8613fd0145877585,0xbd06742ce95f5f36, - 0xa798fc4196e952e7,0x2c48113823b73704, - 0xd17f3b51fca3a7a0,0xf75a15862ca504c5, - 0x82ef85133de648c4,0x9a984d73dbe722fb, - 0xa3ab66580d5fdaf5,0xc13e60d0d2e0ebba, - 0xcc963fee10b7d1b3,0x318df905079926a8, - 0xffbbcfe994e5c61f,0xfdf17746497f7052, - 0x9fd561f1fd0f9bd3,0xfeb6ea8bedefa633, - 0xc7caba6e7c5382c8,0xfe64a52ee96b8fc0, - 0xf9bd690a1b68637b,0x3dfdce7aa3c673b0, - 0x9c1661a651213e2d,0x6bea10ca65c084e, - 0xc31bfa0fe5698db8,0x486e494fcff30a62, - 0xf3e2f893dec3f126,0x5a89dba3c3efccfa, - 0x986ddb5c6b3a76b7,0xf89629465a75e01c, - 0xbe89523386091465,0xf6bbb397f1135823, - 0xee2ba6c0678b597f,0x746aa07ded582e2c, - 0x94db483840b717ef,0xa8c2a44eb4571cdc, - 0xba121a4650e4ddeb,0x92f34d62616ce413, - 0xe896a0d7e51e1566,0x77b020baf9c81d17, - 0x915e2486ef32cd60,0xace1474dc1d122e, - 0xb5b5ada8aaff80b8,0xd819992132456ba, - 0xe3231912d5bf60e6,0x10e1fff697ed6c69, - 0x8df5efabc5979c8f,0xca8d3ffa1ef463c1, - 0xb1736b96b6fd83b3,0xbd308ff8a6b17cb2, - 0xddd0467c64bce4a0,0xac7cb3f6d05ddbde, - 0x8aa22c0dbef60ee4,0x6bcdf07a423aa96b, - 0xad4ab7112eb3929d,0x86c16c98d2c953c6, - 0xd89d64d57a607744,0xe871c7bf077ba8b7, - 0x87625f056c7c4a8b,0x11471cd764ad4972, - 0xa93af6c6c79b5d2d,0xd598e40d3dd89bcf, - 0xd389b47879823479,0x4aff1d108d4ec2c3, - 0x843610cb4bf160cb,0xcedf722a585139ba, - 0xa54394fe1eedb8fe,0xc2974eb4ee658828, - 0xce947a3da6a9273e,0x733d226229feea32, - 0x811ccc668829b887,0x806357d5a3f525f, - 0xa163ff802a3426a8,0xca07c2dcb0cf26f7, - 0xc9bcff6034c13052,0xfc89b393dd02f0b5, - 0xfc2c3f3841f17c67,0xbbac2078d443ace2, - 0x9d9ba7832936edc0,0xd54b944b84aa4c0d, - 0xc5029163f384a931,0xa9e795e65d4df11, - 0xf64335bcf065d37d,0x4d4617b5ff4a16d5, - 0x99ea0196163fa42e,0x504bced1bf8e4e45, - 0xc06481fb9bcf8d39,0xe45ec2862f71e1d6, - 0xf07da27a82c37088,0x5d767327bb4e5a4c, - 0x964e858c91ba2655,0x3a6a07f8d510f86f, - 0xbbe226efb628afea,0x890489f70a55368b, - 0xeadab0aba3b2dbe5,0x2b45ac74ccea842e, - 0x92c8ae6b464fc96f,0x3b0b8bc90012929d, - 0xb77ada0617e3bbcb,0x9ce6ebb40173744, - 0xe55990879ddcaabd,0xcc420a6a101d0515, - 0x8f57fa54c2a9eab6,0x9fa946824a12232d, - 0xb32df8e9f3546564,0x47939822dc96abf9, - 0xdff9772470297ebd,0x59787e2b93bc56f7, - 0x8bfbea76c619ef36,0x57eb4edb3c55b65a, - 0xaefae51477a06b03,0xede622920b6b23f1, - 0xdab99e59958885c4,0xe95fab368e45eced, - 0x88b402f7fd75539b,0x11dbcb0218ebb414, - 0xaae103b5fcd2a881,0xd652bdc29f26a119, - 0xd59944a37c0752a2,0x4be76d3346f0495f, - 0x857fcae62d8493a5,0x6f70a4400c562ddb, - 0xa6dfbd9fb8e5b88e,0xcb4ccd500f6bb952, - 0xd097ad07a71f26b2,0x7e2000a41346a7a7, - 0x825ecc24c873782f,0x8ed400668c0c28c8, - 0xa2f67f2dfa90563b,0x728900802f0f32fa, - 0xcbb41ef979346bca,0x4f2b40a03ad2ffb9, - 0xfea126b7d78186bc,0xe2f610c84987bfa8, - 0x9f24b832e6b0f436,0xdd9ca7d2df4d7c9, - 0xc6ede63fa05d3143,0x91503d1c79720dbb, - 0xf8a95fcf88747d94,0x75a44c6397ce912a, - 0x9b69dbe1b548ce7c,0xc986afbe3ee11aba, - 0xc24452da229b021b,0xfbe85badce996168, - 0xf2d56790ab41c2a2,0xfae27299423fb9c3, - 0x97c560ba6b0919a5,0xdccd879fc967d41a, - 0xbdb6b8e905cb600f,0x5400e987bbc1c920, - 0xed246723473e3813,0x290123e9aab23b68, - 0x9436c0760c86e30b,0xf9a0b6720aaf6521, - 0xb94470938fa89bce,0xf808e40e8d5b3e69, - 0xe7958cb87392c2c2,0xb60b1d1230b20e04, - 0x90bd77f3483bb9b9,0xb1c6f22b5e6f48c2, - 0xb4ecd5f01a4aa828,0x1e38aeb6360b1af3, - 0xe2280b6c20dd5232,0x25c6da63c38de1b0, - 0x8d590723948a535f,0x579c487e5a38ad0e, - 0xb0af48ec79ace837,0x2d835a9df0c6d851, - 0xdcdb1b2798182244,0xf8e431456cf88e65, - 0x8a08f0f8bf0f156b,0x1b8e9ecb641b58ff, - 0xac8b2d36eed2dac5,0xe272467e3d222f3f, - 0xd7adf884aa879177,0x5b0ed81dcc6abb0f, - 0x86ccbb52ea94baea,0x98e947129fc2b4e9, - 0xa87fea27a539e9a5,0x3f2398d747b36224, - 0xd29fe4b18e88640e,0x8eec7f0d19a03aad, - 0x83a3eeeef9153e89,0x1953cf68300424ac, - 0xa48ceaaab75a8e2b,0x5fa8c3423c052dd7, - 0xcdb02555653131b6,0x3792f412cb06794d, - 0x808e17555f3ebf11,0xe2bbd88bbee40bd0, - 0xa0b19d2ab70e6ed6,0x5b6aceaeae9d0ec4, - 0xc8de047564d20a8b,0xf245825a5a445275, - 0xfb158592be068d2e,0xeed6e2f0f0d56712, - 0x9ced737bb6c4183d,0x55464dd69685606b, - 0xc428d05aa4751e4c,0xaa97e14c3c26b886, - 0xf53304714d9265df,0xd53dd99f4b3066a8, - 0x993fe2c6d07b7fab,0xe546a8038efe4029, - 0xbf8fdb78849a5f96,0xde98520472bdd033, - 0xef73d256a5c0f77c,0x963e66858f6d4440, - 0x95a8637627989aad,0xdde7001379a44aa8, - 0xbb127c53b17ec159,0x5560c018580d5d52, - 0xe9d71b689dde71af,0xaab8f01e6e10b4a6, - 0x9226712162ab070d,0xcab3961304ca70e8, - 0xb6b00d69bb55c8d1,0x3d607b97c5fd0d22, - 0xe45c10c42a2b3b05,0x8cb89a7db77c506a, - 0x8eb98a7a9a5b04e3,0x77f3608e92adb242, - 0xb267ed1940f1c61c,0x55f038b237591ed3, - 0xdf01e85f912e37a3,0x6b6c46dec52f6688, - 0x8b61313bbabce2c6,0x2323ac4b3b3da015, - 0xae397d8aa96c1b77,0xabec975e0a0d081a, - 0xd9c7dced53c72255,0x96e7bd358c904a21, - 0x881cea14545c7575,0x7e50d64177da2e54, - 0xaa242499697392d2,0xdde50bd1d5d0b9e9, - 0xd4ad2dbfc3d07787,0x955e4ec64b44e864, - 0x84ec3c97da624ab4,0xbd5af13bef0b113e, - 0xa6274bbdd0fadd61,0xecb1ad8aeacdd58e, - 0xcfb11ead453994ba,0x67de18eda5814af2, - 0x81ceb32c4b43fcf4,0x80eacf948770ced7, - 0xa2425ff75e14fc31,0xa1258379a94d028d, - 0xcad2f7f5359a3b3e,0x96ee45813a04330, - 0xfd87b5f28300ca0d,0x8bca9d6e188853fc, - 0x9e74d1b791e07e48,0x775ea264cf55347e, - 0xc612062576589dda,0x95364afe032a819e, - 0xf79687aed3eec551,0x3a83ddbd83f52205, - 0x9abe14cd44753b52,0xc4926a9672793543, - 0xc16d9a0095928a27,0x75b7053c0f178294, - 0xf1c90080baf72cb1,0x5324c68b12dd6339, - 0x971da05074da7bee,0xd3f6fc16ebca5e04, - 0xbce5086492111aea,0x88f4bb1ca6bcf585, - 0xec1e4a7db69561a5,0x2b31e9e3d06c32e6, - 0x9392ee8e921d5d07,0x3aff322e62439fd0, - 0xb877aa3236a4b449,0x9befeb9fad487c3, - 0xe69594bec44de15b,0x4c2ebe687989a9b4, - 0x901d7cf73ab0acd9,0xf9d37014bf60a11, - 0xb424dc35095cd80f,0x538484c19ef38c95, - 0xe12e13424bb40e13,0x2865a5f206b06fba, - 0x8cbccc096f5088cb,0xf93f87b7442e45d4, - 0xafebff0bcb24aafe,0xf78f69a51539d749, - 0xdbe6fecebdedd5be,0xb573440e5a884d1c, - 0x89705f4136b4a597,0x31680a88f8953031, - 0xabcc77118461cefc,0xfdc20d2b36ba7c3e, - 0xd6bf94d5e57a42bc,0x3d32907604691b4d, - 0x8637bd05af6c69b5,0xa63f9a49c2c1b110, - 0xa7c5ac471b478423,0xfcf80dc33721d54, - 0xd1b71758e219652b,0xd3c36113404ea4a9, - 0x83126e978d4fdf3b,0x645a1cac083126ea, - 0xa3d70a3d70a3d70a,0x3d70a3d70a3d70a4, - 0xcccccccccccccccc,0xcccccccccccccccd, - 0x8000000000000000,0x0, - 0xa000000000000000,0x0, - 0xc800000000000000,0x0, - 0xfa00000000000000,0x0, - 0x9c40000000000000,0x0, - 0xc350000000000000,0x0, - 0xf424000000000000,0x0, - 0x9896800000000000,0x0, - 0xbebc200000000000,0x0, - 0xee6b280000000000,0x0, - 0x9502f90000000000,0x0, - 0xba43b74000000000,0x0, - 0xe8d4a51000000000,0x0, - 0x9184e72a00000000,0x0, - 0xb5e620f480000000,0x0, - 0xe35fa931a0000000,0x0, - 0x8e1bc9bf04000000,0x0, - 0xb1a2bc2ec5000000,0x0, - 0xde0b6b3a76400000,0x0, - 0x8ac7230489e80000,0x0, - 0xad78ebc5ac620000,0x0, - 0xd8d726b7177a8000,0x0, - 0x878678326eac9000,0x0, - 0xa968163f0a57b400,0x0, - 0xd3c21bcecceda100,0x0, - 0x84595161401484a0,0x0, - 0xa56fa5b99019a5c8,0x0, - 0xcecb8f27f4200f3a,0x0, - 0x813f3978f8940984,0x4000000000000000, - 0xa18f07d736b90be5,0x5000000000000000, - 0xc9f2c9cd04674ede,0xa400000000000000, - 0xfc6f7c4045812296,0x4d00000000000000, - 0x9dc5ada82b70b59d,0xf020000000000000, - 0xc5371912364ce305,0x6c28000000000000, - 0xf684df56c3e01bc6,0xc732000000000000, - 0x9a130b963a6c115c,0x3c7f400000000000, - 0xc097ce7bc90715b3,0x4b9f100000000000, - 0xf0bdc21abb48db20,0x1e86d40000000000, - 0x96769950b50d88f4,0x1314448000000000, - 0xbc143fa4e250eb31,0x17d955a000000000, - 0xeb194f8e1ae525fd,0x5dcfab0800000000, - 0x92efd1b8d0cf37be,0x5aa1cae500000000, - 0xb7abc627050305ad,0xf14a3d9e40000000, - 0xe596b7b0c643c719,0x6d9ccd05d0000000, - 0x8f7e32ce7bea5c6f,0xe4820023a2000000, - 0xb35dbf821ae4f38b,0xdda2802c8a800000, - 0xe0352f62a19e306e,0xd50b2037ad200000, - 0x8c213d9da502de45,0x4526f422cc340000, - 0xaf298d050e4395d6,0x9670b12b7f410000, - 0xdaf3f04651d47b4c,0x3c0cdd765f114000, - 0x88d8762bf324cd0f,0xa5880a69fb6ac800, - 0xab0e93b6efee0053,0x8eea0d047a457a00, - 0xd5d238a4abe98068,0x72a4904598d6d880, - 0x85a36366eb71f041,0x47a6da2b7f864750, - 0xa70c3c40a64e6c51,0x999090b65f67d924, - 0xd0cf4b50cfe20765,0xfff4b4e3f741cf6d, - 0x82818f1281ed449f,0xbff8f10e7a8921a4, - 0xa321f2d7226895c7,0xaff72d52192b6a0d, - 0xcbea6f8ceb02bb39,0x9bf4f8a69f764490, - 0xfee50b7025c36a08,0x2f236d04753d5b4, - 0x9f4f2726179a2245,0x1d762422c946590, - 0xc722f0ef9d80aad6,0x424d3ad2b7b97ef5, - 0xf8ebad2b84e0d58b,0xd2e0898765a7deb2, - 0x9b934c3b330c8577,0x63cc55f49f88eb2f, - 0xc2781f49ffcfa6d5,0x3cbf6b71c76b25fb, - 0xf316271c7fc3908a,0x8bef464e3945ef7a, - 0x97edd871cfda3a56,0x97758bf0e3cbb5ac, - 0xbde94e8e43d0c8ec,0x3d52eeed1cbea317, - 0xed63a231d4c4fb27,0x4ca7aaa863ee4bdd, - 0x945e455f24fb1cf8,0x8fe8caa93e74ef6a, - 0xb975d6b6ee39e436,0xb3e2fd538e122b44, - 0xe7d34c64a9c85d44,0x60dbbca87196b616, - 0x90e40fbeea1d3a4a,0xbc8955e946fe31cd, - 0xb51d13aea4a488dd,0x6babab6398bdbe41, - 0xe264589a4dcdab14,0xc696963c7eed2dd1, - 0x8d7eb76070a08aec,0xfc1e1de5cf543ca2, - 0xb0de65388cc8ada8,0x3b25a55f43294bcb, - 0xdd15fe86affad912,0x49ef0eb713f39ebe, - 0x8a2dbf142dfcc7ab,0x6e3569326c784337, - 0xacb92ed9397bf996,0x49c2c37f07965404, - 0xd7e77a8f87daf7fb,0xdc33745ec97be906, - 0x86f0ac99b4e8dafd,0x69a028bb3ded71a3, - 0xa8acd7c0222311bc,0xc40832ea0d68ce0c, - 0xd2d80db02aabd62b,0xf50a3fa490c30190, - 0x83c7088e1aab65db,0x792667c6da79e0fa, - 0xa4b8cab1a1563f52,0x577001b891185938, - 0xcde6fd5e09abcf26,0xed4c0226b55e6f86, - 0x80b05e5ac60b6178,0x544f8158315b05b4, - 0xa0dc75f1778e39d6,0x696361ae3db1c721, - 0xc913936dd571c84c,0x3bc3a19cd1e38e9, - 0xfb5878494ace3a5f,0x4ab48a04065c723, - 0x9d174b2dcec0e47b,0x62eb0d64283f9c76, - 0xc45d1df942711d9a,0x3ba5d0bd324f8394, - 0xf5746577930d6500,0xca8f44ec7ee36479, - 0x9968bf6abbe85f20,0x7e998b13cf4e1ecb, - 0xbfc2ef456ae276e8,0x9e3fedd8c321a67e, - 0xefb3ab16c59b14a2,0xc5cfe94ef3ea101e, - 0x95d04aee3b80ece5,0xbba1f1d158724a12, - 0xbb445da9ca61281f,0x2a8a6e45ae8edc97, - 0xea1575143cf97226,0xf52d09d71a3293bd, - 0x924d692ca61be758,0x593c2626705f9c56, - 0xb6e0c377cfa2e12e,0x6f8b2fb00c77836c, - 0xe498f455c38b997a,0xb6dfb9c0f956447, - 0x8edf98b59a373fec,0x4724bd4189bd5eac, - 0xb2977ee300c50fe7,0x58edec91ec2cb657, - 0xdf3d5e9bc0f653e1,0x2f2967b66737e3ed, - 0x8b865b215899f46c,0xbd79e0d20082ee74, - 0xae67f1e9aec07187,0xecd8590680a3aa11, - 0xda01ee641a708de9,0xe80e6f4820cc9495, - 0x884134fe908658b2,0x3109058d147fdcdd, - 0xaa51823e34a7eede,0xbd4b46f0599fd415, - 0xd4e5e2cdc1d1ea96,0x6c9e18ac7007c91a, - 0x850fadc09923329e,0x3e2cf6bc604ddb0, - 0xa6539930bf6bff45,0x84db8346b786151c, - 0xcfe87f7cef46ff16,0xe612641865679a63, - 0x81f14fae158c5f6e,0x4fcb7e8f3f60c07e, - 0xa26da3999aef7749,0xe3be5e330f38f09d, - 0xcb090c8001ab551c,0x5cadf5bfd3072cc5, - 0xfdcb4fa002162a63,0x73d9732fc7c8f7f6, - 0x9e9f11c4014dda7e,0x2867e7fddcdd9afa, - 0xc646d63501a1511d,0xb281e1fd541501b8, - 0xf7d88bc24209a565,0x1f225a7ca91a4226, - 0x9ae757596946075f,0x3375788de9b06958, - 0xc1a12d2fc3978937,0x52d6b1641c83ae, - 0xf209787bb47d6b84,0xc0678c5dbd23a49a, - 0x9745eb4d50ce6332,0xf840b7ba963646e0, - 0xbd176620a501fbff,0xb650e5a93bc3d898, - 0xec5d3fa8ce427aff,0xa3e51f138ab4cebe, - 0x93ba47c980e98cdf,0xc66f336c36b10137, - 0xb8a8d9bbe123f017,0xb80b0047445d4184, - 0xe6d3102ad96cec1d,0xa60dc059157491e5, - 0x9043ea1ac7e41392,0x87c89837ad68db2f, - 0xb454e4a179dd1877,0x29babe4598c311fb, - 0xe16a1dc9d8545e94,0xf4296dd6fef3d67a, - 0x8ce2529e2734bb1d,0x1899e4a65f58660c, - 0xb01ae745b101e9e4,0x5ec05dcff72e7f8f, - 0xdc21a1171d42645d,0x76707543f4fa1f73, - 0x899504ae72497eba,0x6a06494a791c53a8, - 0xabfa45da0edbde69,0x487db9d17636892, - 0xd6f8d7509292d603,0x45a9d2845d3c42b6, - 0x865b86925b9bc5c2,0xb8a2392ba45a9b2, - 0xa7f26836f282b732,0x8e6cac7768d7141e, - 0xd1ef0244af2364ff,0x3207d795430cd926, - 0x8335616aed761f1f,0x7f44e6bd49e807b8, - 0xa402b9c5a8d3a6e7,0x5f16206c9c6209a6, - 0xcd036837130890a1,0x36dba887c37a8c0f, - 0x802221226be55a64,0xc2494954da2c9789, - 0xa02aa96b06deb0fd,0xf2db9baa10b7bd6c, - 0xc83553c5c8965d3d,0x6f92829494e5acc7, - 0xfa42a8b73abbf48c,0xcb772339ba1f17f9, - 0x9c69a97284b578d7,0xff2a760414536efb, - 0xc38413cf25e2d70d,0xfef5138519684aba, - 0xf46518c2ef5b8cd1,0x7eb258665fc25d69, - 0x98bf2f79d5993802,0xef2f773ffbd97a61, - 0xbeeefb584aff8603,0xaafb550ffacfd8fa, - 0xeeaaba2e5dbf6784,0x95ba2a53f983cf38, - 0x952ab45cfa97a0b2,0xdd945a747bf26183, - 0xba756174393d88df,0x94f971119aeef9e4, - 0xe912b9d1478ceb17,0x7a37cd5601aab85d, - 0x91abb422ccb812ee,0xac62e055c10ab33a, - 0xb616a12b7fe617aa,0x577b986b314d6009, - 0xe39c49765fdf9d94,0xed5a7e85fda0b80b, - 0x8e41ade9fbebc27d,0x14588f13be847307, - 0xb1d219647ae6b31c,0x596eb2d8ae258fc8, - 0xde469fbd99a05fe3,0x6fca5f8ed9aef3bb, - 0x8aec23d680043bee,0x25de7bb9480d5854, - 0xada72ccc20054ae9,0xaf561aa79a10ae6a, - 0xd910f7ff28069da4,0x1b2ba1518094da04, - 0x87aa9aff79042286,0x90fb44d2f05d0842, - 0xa99541bf57452b28,0x353a1607ac744a53, - 0xd3fa922f2d1675f2,0x42889b8997915ce8, - 0x847c9b5d7c2e09b7,0x69956135febada11, - 0xa59bc234db398c25,0x43fab9837e699095, - 0xcf02b2c21207ef2e,0x94f967e45e03f4bb, - 0x8161afb94b44f57d,0x1d1be0eebac278f5, - 0xa1ba1ba79e1632dc,0x6462d92a69731732, - 0xca28a291859bbf93,0x7d7b8f7503cfdcfe, - 0xfcb2cb35e702af78,0x5cda735244c3d43e, - 0x9defbf01b061adab,0x3a0888136afa64a7, - 0xc56baec21c7a1916,0x88aaa1845b8fdd0, - 0xf6c69a72a3989f5b,0x8aad549e57273d45, - 0x9a3c2087a63f6399,0x36ac54e2f678864b, - 0xc0cb28a98fcf3c7f,0x84576a1bb416a7dd, - 0xf0fdf2d3f3c30b9f,0x656d44a2a11c51d5, - 0x969eb7c47859e743,0x9f644ae5a4b1b325, - 0xbc4665b596706114,0x873d5d9f0dde1fee, - 0xeb57ff22fc0c7959,0xa90cb506d155a7ea, - 0x9316ff75dd87cbd8,0x9a7f12442d588f2, - 0xb7dcbf5354e9bece,0xc11ed6d538aeb2f, - 0xe5d3ef282a242e81,0x8f1668c8a86da5fa, - 0x8fa475791a569d10,0xf96e017d694487bc, - 0xb38d92d760ec4455,0x37c981dcc395a9ac, - 0xe070f78d3927556a,0x85bbe253f47b1417, - 0x8c469ab843b89562,0x93956d7478ccec8e, - 0xaf58416654a6babb,0x387ac8d1970027b2, - 0xdb2e51bfe9d0696a,0x6997b05fcc0319e, - 0x88fcf317f22241e2,0x441fece3bdf81f03, - 0xab3c2fddeeaad25a,0xd527e81cad7626c3, - 0xd60b3bd56a5586f1,0x8a71e223d8d3b074, - 0x85c7056562757456,0xf6872d5667844e49, - 0xa738c6bebb12d16c,0xb428f8ac016561db, - 0xd106f86e69d785c7,0xe13336d701beba52, - 0x82a45b450226b39c,0xecc0024661173473, - 0xa34d721642b06084,0x27f002d7f95d0190, - 0xcc20ce9bd35c78a5,0x31ec038df7b441f4, - 0xff290242c83396ce,0x7e67047175a15271, - 0x9f79a169bd203e41,0xf0062c6e984d386, - 0xc75809c42c684dd1,0x52c07b78a3e60868, - 0xf92e0c3537826145,0xa7709a56ccdf8a82, - 0x9bbcc7a142b17ccb,0x88a66076400bb691, - 0xc2abf989935ddbfe,0x6acff893d00ea435, - 0xf356f7ebf83552fe,0x583f6b8c4124d43, - 0x98165af37b2153de,0xc3727a337a8b704a, - 0xbe1bf1b059e9a8d6,0x744f18c0592e4c5c, - 0xeda2ee1c7064130c,0x1162def06f79df73, - 0x9485d4d1c63e8be7,0x8addcb5645ac2ba8, - 0xb9a74a0637ce2ee1,0x6d953e2bd7173692, - 0xe8111c87c5c1ba99,0xc8fa8db6ccdd0437, - 0x910ab1d4db9914a0,0x1d9c9892400a22a2, - 0xb54d5e4a127f59c8,0x2503beb6d00cab4b, - 0xe2a0b5dc971f303a,0x2e44ae64840fd61d, - 0x8da471a9de737e24,0x5ceaecfed289e5d2, - 0xb10d8e1456105dad,0x7425a83e872c5f47, - 0xdd50f1996b947518,0xd12f124e28f77719, - 0x8a5296ffe33cc92f,0x82bd6b70d99aaa6f, - 0xace73cbfdc0bfb7b,0x636cc64d1001550b, - 0xd8210befd30efa5a,0x3c47f7e05401aa4e, - 0x8714a775e3e95c78,0x65acfaec34810a71, - 0xa8d9d1535ce3b396,0x7f1839a741a14d0d, - 0xd31045a8341ca07c,0x1ede48111209a050, - 0x83ea2b892091e44d,0x934aed0aab460432, - 0xa4e4b66b68b65d60,0xf81da84d5617853f, - 0xce1de40642e3f4b9,0x36251260ab9d668e, - 0x80d2ae83e9ce78f3,0xc1d72b7c6b426019, - 0xa1075a24e4421730,0xb24cf65b8612f81f, - 0xc94930ae1d529cfc,0xdee033f26797b627, - 0xfb9b7cd9a4a7443c,0x169840ef017da3b1, - 0x9d412e0806e88aa5,0x8e1f289560ee864e, - 0xc491798a08a2ad4e,0xf1a6f2bab92a27e2, - 0xf5b5d7ec8acb58a2,0xae10af696774b1db, - 0x9991a6f3d6bf1765,0xacca6da1e0a8ef29, - 0xbff610b0cc6edd3f,0x17fd090a58d32af3, - 0xeff394dcff8a948e,0xddfc4b4cef07f5b0, - 0x95f83d0a1fb69cd9,0x4abdaf101564f98e, - 0xbb764c4ca7a4440f,0x9d6d1ad41abe37f1, - 0xea53df5fd18d5513,0x84c86189216dc5ed, - 0x92746b9be2f8552c,0x32fd3cf5b4e49bb4, - 0xb7118682dbb66a77,0x3fbc8c33221dc2a1, - 0xe4d5e82392a40515,0xfabaf3feaa5334a, - 0x8f05b1163ba6832d,0x29cb4d87f2a7400e, - 0xb2c71d5bca9023f8,0x743e20e9ef511012, - 0xdf78e4b2bd342cf6,0x914da9246b255416, - 0x8bab8eefb6409c1a,0x1ad089b6c2f7548e, - 0xae9672aba3d0c320,0xa184ac2473b529b1, - 0xda3c0f568cc4f3e8,0xc9e5d72d90a2741e, - 0x8865899617fb1871,0x7e2fa67c7a658892, - 0xaa7eebfb9df9de8d,0xddbb901b98feeab7, - 0xd51ea6fa85785631,0x552a74227f3ea565, - 0x8533285c936b35de,0xd53a88958f87275f, - 0xa67ff273b8460356,0x8a892abaf368f137, - 0xd01fef10a657842c,0x2d2b7569b0432d85, - 0x8213f56a67f6b29b,0x9c3b29620e29fc73, - 0xa298f2c501f45f42,0x8349f3ba91b47b8f, - 0xcb3f2f7642717713,0x241c70a936219a73, - 0xfe0efb53d30dd4d7,0xed238cd383aa0110, - 0x9ec95d1463e8a506,0xf4363804324a40aa, - 0xc67bb4597ce2ce48,0xb143c6053edcd0d5, - 0xf81aa16fdc1b81da,0xdd94b7868e94050a, - 0x9b10a4e5e9913128,0xca7cf2b4191c8326, - 0xc1d4ce1f63f57d72,0xfd1c2f611f63a3f0, - 0xf24a01a73cf2dccf,0xbc633b39673c8cec, - 0x976e41088617ca01,0xd5be0503e085d813, - 0xbd49d14aa79dbc82,0x4b2d8644d8a74e18, - 0xec9c459d51852ba2,0xddf8e7d60ed1219e, - 0x93e1ab8252f33b45,0xcabb90e5c942b503, - 0xb8da1662e7b00a17,0x3d6a751f3b936243, - 0xe7109bfba19c0c9d,0xcc512670a783ad4, - 0x906a617d450187e2,0x27fb2b80668b24c5, - 0xb484f9dc9641e9da,0xb1f9f660802dedf6, - 0xe1a63853bbd26451,0x5e7873f8a0396973, - 0x8d07e33455637eb2,0xdb0b487b6423e1e8, - 0xb049dc016abc5e5f,0x91ce1a9a3d2cda62, - 0xdc5c5301c56b75f7,0x7641a140cc7810fb, - 0x89b9b3e11b6329ba,0xa9e904c87fcb0a9d, - 0xac2820d9623bf429,0x546345fa9fbdcd44, - 0xd732290fbacaf133,0xa97c177947ad4095, - 0x867f59a9d4bed6c0,0x49ed8eabcccc485d, - 0xa81f301449ee8c70,0x5c68f256bfff5a74, - 0xd226fc195c6a2f8c,0x73832eec6fff3111, - 0x83585d8fd9c25db7,0xc831fd53c5ff7eab, - 0xa42e74f3d032f525,0xba3e7ca8b77f5e55, - 0xcd3a1230c43fb26f,0x28ce1bd2e55f35eb, - 0x80444b5e7aa7cf85,0x7980d163cf5b81b3, - 0xa0555e361951c366,0xd7e105bcc332621f, - 0xc86ab5c39fa63440,0x8dd9472bf3fefaa7, - 0xfa856334878fc150,0xb14f98f6f0feb951, - 0x9c935e00d4b9d8d2,0x6ed1bf9a569f33d3, - 0xc3b8358109e84f07,0xa862f80ec4700c8, - 0xf4a642e14c6262c8,0xcd27bb612758c0fa, - 0x98e7e9cccfbd7dbd,0x8038d51cb897789c, - 0xbf21e44003acdd2c,0xe0470a63e6bd56c3, - 0xeeea5d5004981478,0x1858ccfce06cac74, - 0x95527a5202df0ccb,0xf37801e0c43ebc8, - 0xbaa718e68396cffd,0xd30560258f54e6ba, - 0xe950df20247c83fd,0x47c6b82ef32a2069, - 0x91d28b7416cdd27e,0x4cdc331d57fa5441, - 0xb6472e511c81471d,0xe0133fe4adf8e952, - 0xe3d8f9e563a198e5,0x58180fddd97723a6, - 0x8e679c2f5e44ff8f,0x570f09eaa7ea7648,}; +constexpr uint64_t powers_template::power_of_five_128[number_of_entries]; + using powers = powers_template<>; -} +} // namespace fast_float #endif - #ifndef FASTFLOAT_DECIMAL_TO_BINARY_H #define FASTFLOAT_DECIMAL_TO_BINARY_H @@ -8808,7 +9763,7 @@ namespace fast_float { // low part corresponding to the least significant bits. // template -fastfloat_really_inline +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 value128 compute_product_approximation(int64_t q, uint64_t w) { const int index = 2 * int(q - powers::smallest_power_of_five); // For small values of q, e.g., q in [0,27], the answer is always exact because @@ -8839,9 +9794,9 @@ namespace detail { * where * p = log(5**q)/log(2) = q * log(5)/log(2) * - * For negative values of q in (-400,0), we have that + * For negative values of q in (-400,0), we have that * f = (((152170 + 65536) * q ) >> 16); - * is equal to + * is equal to * -ceil(p) + q * where * p = log(5**-q)/log(2) = -q * log(5)/log(2) @@ -8854,7 +9809,7 @@ namespace detail { // create an adjusted mantissa, biased by the invalid power2 // for significant digits already multiplied by 10 ** q. template -fastfloat_really_inline +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 adjusted_mantissa compute_error_scaled(int64_t q, uint64_t w, int lz) noexcept { int hilz = int(w >> 63) ^ 1; adjusted_mantissa answer; @@ -8867,7 +9822,7 @@ adjusted_mantissa compute_error_scaled(int64_t q, uint64_t w, int lz) noexcept // w * 10 ** q, without rounding the representation up. // the power2 in the exponent will be adjusted by invalid_am_bias. template -fastfloat_really_inline +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa compute_error(int64_t q, uint64_t w) noexcept { int lz = leading_zeroes(w); w <<= lz; @@ -8881,7 +9836,7 @@ adjusted_mantissa compute_error(int64_t q, uint64_t w) noexcept { // return an adjusted_mantissa with a negative power of 2: the caller should recompute // in such cases. template -fastfloat_really_inline +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa compute_float(int64_t q, uint64_t w) noexcept { adjusted_mantissa answer; if ((w == 0) || (q < binary::smallest_power_of_ten())) { @@ -8908,23 +9863,19 @@ adjusted_mantissa compute_float(int64_t q, uint64_t w) noexcept { // 3. We might lose a bit due to the "upperbit" routine (result too small, requiring a shift) value128 product = compute_product_approximation(q, w); - if(product.low == 0xFFFFFFFFFFFFFFFF) { // could guard it further - // In some very rare cases, this could happen, in which case we might need a more accurate - // computation that what we can provide cheaply. This is very, very unlikely. - // - const bool inside_safe_exponent = (q >= -27) && (q <= 55); // always good because 5**q <2**128 when q>=0, - // and otherwise, for q<0, we have 5**-q<2**64 and the 128-bit reciprocal allows for exact computation. - if(!inside_safe_exponent) { - return compute_error_scaled(q, product.high, lz); - } - } + // The computed 'product' is always sufficient. + // Mathematical proof: + // Noble Mushtak and Daniel Lemire, Fast Number Parsing Without Fallback (to appear) + // See script/mushtak_lemire.py + // The "compute_product_approximation" function can be slightly slower than a branchless approach: // value128 product = compute_product(q, w); // but in practice, we can win big with the compute_product_approximation if its additional branch // is easily predicted. Which is best is data specific. int upperbit = int(product.high >> 63); + int shift = upperbit + 64 - binary::mantissa_explicit_bits() - 3; - answer.mantissa = product.high >> (upperbit + 64 - binary::mantissa_explicit_bits() - 3); + answer.mantissa = product.high >> shift; answer.power2 = int32_t(detail::power(int32_t(q)) + upperbit - lz - binary::minimum_exponent()); if (answer.power2 <= 0) { // we have a subnormal? @@ -8960,7 +9911,7 @@ adjusted_mantissa compute_float(int64_t q, uint64_t w) noexcept { // To be in-between two floats we need that in doing // answer.mantissa = product.high >> (upperbit + 64 - binary::mantissa_explicit_bits() - 3); // ... we dropped out only zeroes. But if this happened, then we can go back!!! - if((answer.mantissa << (upperbit + 64 - binary::mantissa_explicit_bits() - 3)) == product.high) { + if((answer.mantissa << shift) == product.high) { answer.mantissa &= ~uint64_t(1); // flip it so that we do not round up } } @@ -8984,7 +9935,6 @@ adjusted_mantissa compute_float(int64_t q, uint64_t w) noexcept { #endif - #ifndef FASTFLOAT_BIGINT_H #define FASTFLOAT_BIGINT_H @@ -9006,7 +9956,7 @@ namespace fast_float { // we might have platforms where `CHAR_BIT` is not 8, so let's avoid // doing `8 * sizeof(limb)`. #if defined(FASTFLOAT_64BIT) && !defined(__sparc) -#define FASTFLOAT_64BIT_LIMB +#define FASTFLOAT_64BIT_LIMB 1 typedef uint64_t limb; constexpr size_t limb_bits = 64; #else @@ -9039,27 +9989,27 @@ struct stackvec { stackvec &operator=(stackvec &&other) = delete; // create stack vector from existing limb span. - stackvec(limb_span s) { + FASTFLOAT_CONSTEXPR20 stackvec(limb_span s) { FASTFLOAT_ASSERT(try_extend(s)); } - limb& operator[](size_t index) noexcept { + FASTFLOAT_CONSTEXPR14 limb& operator[](size_t index) noexcept { FASTFLOAT_DEBUG_ASSERT(index < length); return data[index]; } - const limb& operator[](size_t index) const noexcept { + FASTFLOAT_CONSTEXPR14 const limb& operator[](size_t index) const noexcept { FASTFLOAT_DEBUG_ASSERT(index < length); return data[index]; } // index from the end of the container - const limb& rindex(size_t index) const noexcept { + FASTFLOAT_CONSTEXPR14 const limb& rindex(size_t index) const noexcept { FASTFLOAT_DEBUG_ASSERT(index < length); size_t rindex = length - index - 1; return data[rindex]; } // set the length, without bounds checking. - void set_len(size_t len) noexcept { + FASTFLOAT_CONSTEXPR14 void set_len(size_t len) noexcept { length = uint16_t(len); } constexpr size_t len() const noexcept { @@ -9072,12 +10022,12 @@ struct stackvec { return size; } // append item to vector, without bounds checking - void push_unchecked(limb value) noexcept { + FASTFLOAT_CONSTEXPR14 void push_unchecked(limb value) noexcept { data[length] = value; length++; } // append item to vector, returning if item was added - bool try_push(limb value) noexcept { + FASTFLOAT_CONSTEXPR14 bool try_push(limb value) noexcept { if (len() < capacity()) { push_unchecked(value); return true; @@ -9086,13 +10036,13 @@ struct stackvec { } } // add items to the vector, from a span, without bounds checking - void extend_unchecked(limb_span s) noexcept { + FASTFLOAT_CONSTEXPR20 void extend_unchecked(limb_span s) noexcept { limb* ptr = data + length; - ::memcpy((void*)ptr, (const void*)s.ptr, sizeof(limb) * s.len()); + std::copy_n(s.ptr, s.len(), ptr); set_len(len() + s.len()); } // try to add items to the vector, returning if items were added - bool try_extend(limb_span s) noexcept { + FASTFLOAT_CONSTEXPR20 bool try_extend(limb_span s) noexcept { if (len() + s.len() <= capacity()) { extend_unchecked(s); return true; @@ -9103,6 +10053,7 @@ struct stackvec { // resize the vector, without bounds checking // if the new size is longer than the vector, assign value to each // appended item. + FASTFLOAT_CONSTEXPR20 void resize_unchecked(size_t new_len, limb value) noexcept { if (new_len > len()) { size_t count = new_len - len(); @@ -9115,7 +10066,7 @@ struct stackvec { } } // try to resize the vector, returning if the vector was resized. - bool try_resize(size_t new_len, limb value) noexcept { + FASTFLOAT_CONSTEXPR20 bool try_resize(size_t new_len, limb value) noexcept { if (new_len > capacity()) { return false; } else { @@ -9126,7 +10077,7 @@ struct stackvec { // check if any limbs are non-zero after the given index. // this needs to be done in reverse order, since the index // is relative to the most significant limbs. - bool nonzero(size_t index) const noexcept { + FASTFLOAT_CONSTEXPR14 bool nonzero(size_t index) const noexcept { while (index < len()) { if (rindex(index) != 0) { return true; @@ -9136,27 +10087,27 @@ struct stackvec { return false; } // normalize the big integer, so most-significant zero limbs are removed. - void normalize() noexcept { + FASTFLOAT_CONSTEXPR14 void normalize() noexcept { while (len() > 0 && rindex(0) == 0) { length--; } } }; -fastfloat_really_inline +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint64_t empty_hi64(bool& truncated) noexcept { truncated = false; return 0; } -fastfloat_really_inline +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t uint64_hi64(uint64_t r0, bool& truncated) noexcept { truncated = false; int shl = leading_zeroes(r0); return r0 << shl; } -fastfloat_really_inline +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t uint64_hi64(uint64_t r0, uint64_t r1, bool& truncated) noexcept { int shl = leading_zeroes(r0); if (shl == 0) { @@ -9169,19 +10120,19 @@ uint64_t uint64_hi64(uint64_t r0, uint64_t r1, bool& truncated) noexcept { } } -fastfloat_really_inline +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t uint32_hi64(uint32_t r0, bool& truncated) noexcept { return uint64_hi64(r0, truncated); } -fastfloat_really_inline +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t uint32_hi64(uint32_t r0, uint32_t r1, bool& truncated) noexcept { uint64_t x0 = r0; uint64_t x1 = r1; return uint64_hi64((x0 << 32) | x1, truncated); } -fastfloat_really_inline +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t uint32_hi64(uint32_t r0, uint32_t r1, uint32_t r2, bool& truncated) noexcept { uint64_t x0 = r0; uint64_t x1 = r1; @@ -9193,15 +10144,16 @@ uint64_t uint32_hi64(uint32_t r0, uint32_t r1, uint32_t r2, bool& truncated) noe // we want an efficient operation. for msvc, where // we don't have built-in intrinsics, this is still // pretty fast. -fastfloat_really_inline +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 limb scalar_add(limb x, limb y, bool& overflow) noexcept { limb z; - // gcc and clang #if defined(__has_builtin) #if __has_builtin(__builtin_add_overflow) - overflow = __builtin_add_overflow(x, y, &z); - return z; + if (!cpp20_and_in_constexpr()) { + overflow = __builtin_add_overflow(x, y, &z); + return z; + } #endif #endif @@ -9212,7 +10164,7 @@ limb scalar_add(limb x, limb y, bool& overflow) noexcept { } // multiply two small integers, getting both the high and low bits. -fastfloat_really_inline +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 limb scalar_mul(limb x, limb y, limb& carry) noexcept { #ifdef FASTFLOAT_64BIT_LIMB #if defined(__SIZEOF_INT128__) @@ -9240,7 +10192,8 @@ limb scalar_mul(limb x, limb y, limb& carry) noexcept { // add scalar value to bigint starting from offset. // used in grade school multiplication template -inline bool small_add_from(stackvec& vec, limb y, size_t start) noexcept { +inline FASTFLOAT_CONSTEXPR20 +bool small_add_from(stackvec& vec, limb y, size_t start) noexcept { size_t index = start; limb carry = y; bool overflow; @@ -9257,13 +10210,15 @@ inline bool small_add_from(stackvec& vec, limb y, size_t start) noexcept { // add scalar value to bigint. template -fastfloat_really_inline bool small_add(stackvec& vec, limb y) noexcept { +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +bool small_add(stackvec& vec, limb y) noexcept { return small_add_from(vec, y, 0); } // multiply bigint by scalar value. template -inline bool small_mul(stackvec& vec, limb y) noexcept { +inline FASTFLOAT_CONSTEXPR20 +bool small_mul(stackvec& vec, limb y) noexcept { limb carry = 0; for (size_t index = 0; index < vec.len(); index++) { vec[index] = scalar_mul(vec[index], y, carry); @@ -9277,6 +10232,7 @@ inline bool small_mul(stackvec& vec, limb y) noexcept { // add bigint to bigint starting from index. // used in grade school multiplication template +FASTFLOAT_CONSTEXPR20 bool large_add_from(stackvec& x, limb_span y, size_t start) noexcept { // the effective x buffer is from `xstart..x.len()`, so exit early // if we can't get that current range. @@ -9307,12 +10263,14 @@ bool large_add_from(stackvec& x, limb_span y, size_t start) noexcept { // add bigint to bigint. template -fastfloat_really_inline bool large_add_from(stackvec& x, limb_span y) noexcept { +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +bool large_add_from(stackvec& x, limb_span y) noexcept { return large_add_from(x, y, 0); } // grade-school multiplication algorithm template +FASTFLOAT_CONSTEXPR20 bool long_mul(stackvec& x, limb_span y) noexcept { limb_span xs = limb_span(x.data, x.len()); stackvec z(xs); @@ -9341,6 +10299,7 @@ bool long_mul(stackvec& x, limb_span y) noexcept { // grade-school multiplication algorithm template +FASTFLOAT_CONSTEXPR20 bool large_mul(stackvec& x, limb_span y) noexcept { if (y.len() == 1) { FASTFLOAT_TRY(small_mul(x, y[0])); @@ -9350,21 +10309,52 @@ bool large_mul(stackvec& x, limb_span y) noexcept { return true; } +template +struct pow5_tables { + static constexpr uint32_t large_step = 135; + static constexpr uint64_t small_power_of_5[] = { + 1UL, 5UL, 25UL, 125UL, 625UL, 3125UL, 15625UL, 78125UL, 390625UL, + 1953125UL, 9765625UL, 48828125UL, 244140625UL, 1220703125UL, + 6103515625UL, 30517578125UL, 152587890625UL, 762939453125UL, + 3814697265625UL, 19073486328125UL, 95367431640625UL, 476837158203125UL, + 2384185791015625UL, 11920928955078125UL, 59604644775390625UL, + 298023223876953125UL, 1490116119384765625UL, 7450580596923828125UL, + }; +#ifdef FASTFLOAT_64BIT_LIMB + constexpr static limb large_power_of_5[] = { + 1414648277510068013UL, 9180637584431281687UL, 4539964771860779200UL, + 10482974169319127550UL, 198276706040285095UL}; +#else + constexpr static limb large_power_of_5[] = { + 4279965485U, 329373468U, 4020270615U, 2137533757U, 4287402176U, + 1057042919U, 1071430142U, 2440757623U, 381945767U, 46164893U}; +#endif +}; + +template +constexpr uint32_t pow5_tables::large_step; + +template +constexpr uint64_t pow5_tables::small_power_of_5[]; + +template +constexpr limb pow5_tables::large_power_of_5[]; + // big integer type. implements a small subset of big integer // arithmetic, using simple algorithms since asymptotically // faster algorithms are slower for a small number of limbs. // all operations assume the big-integer is normalized. -struct bigint { +struct bigint : pow5_tables<> { // storage of the limbs, in little-endian order. stackvec vec; - bigint(): vec() {} + FASTFLOAT_CONSTEXPR20 bigint(): vec() {} bigint(const bigint &) = delete; bigint &operator=(const bigint &) = delete; bigint(bigint &&) = delete; bigint &operator=(bigint &&other) = delete; - bigint(uint64_t value): vec() { + FASTFLOAT_CONSTEXPR20 bigint(uint64_t value): vec() { #ifdef FASTFLOAT_64BIT_LIMB vec.push_unchecked(value); #else @@ -9376,7 +10366,7 @@ struct bigint { // get the high 64 bits from the vector, and if bits were truncated. // this is to get the significant digits for the float. - uint64_t hi64(bool& truncated) const noexcept { + FASTFLOAT_CONSTEXPR20 uint64_t hi64(bool& truncated) const noexcept { #ifdef FASTFLOAT_64BIT_LIMB if (vec.len() == 0) { return empty_hi64(truncated); @@ -9408,7 +10398,7 @@ struct bigint { // positive, this is larger, otherwise they are equal. // the limbs are stored in little-endian order, so we // must compare the limbs in ever order. - int compare(const bigint& other) const noexcept { + FASTFLOAT_CONSTEXPR20 int compare(const bigint& other) const noexcept { if (vec.len() > other.vec.len()) { return 1; } else if (vec.len() < other.vec.len()) { @@ -9429,7 +10419,7 @@ struct bigint { // shift left each limb n bits, carrying over to the new limb // returns true if we were able to shift all the digits. - bool shl_bits(size_t n) noexcept { + FASTFLOAT_CONSTEXPR20 bool shl_bits(size_t n) noexcept { // Internally, for each item, we shift left by n, and add the previous // right shifted limb-bits. // For example, we transform (for u8) shifted left 2, to: @@ -9455,7 +10445,7 @@ struct bigint { } // move the limbs left by `n` limbs. - bool shl_limbs(size_t n) noexcept { + FASTFLOAT_CONSTEXPR20 bool shl_limbs(size_t n) noexcept { FASTFLOAT_DEBUG_ASSERT(n != 0); if (n + vec.len() > vec.capacity()) { return false; @@ -9463,7 +10453,7 @@ struct bigint { // move limbs limb* dst = vec.data + n; const limb* src = vec.data; - ::memmove(dst, src, sizeof(limb) * vec.len()); + std::copy_backward(src, src + vec.len(), dst + vec.len()); // fill in empty limbs limb* first = vec.data; limb* last = first + n; @@ -9476,7 +10466,7 @@ struct bigint { } // move the limbs left by `n` bits. - bool shl(size_t n) noexcept { + FASTFLOAT_CONSTEXPR20 bool shl(size_t n) noexcept { size_t rem = n % limb_bits; size_t div = n / limb_bits; if (rem != 0) { @@ -9489,7 +10479,7 @@ struct bigint { } // get the number of leading zeros in the bigint. - int ctlz() const noexcept { + FASTFLOAT_CONSTEXPR20 int ctlz() const noexcept { if (vec.is_empty()) { return 0; } else { @@ -9504,45 +10494,27 @@ struct bigint { } // get the number of bits in the bigint. - int bit_length() const noexcept { + FASTFLOAT_CONSTEXPR20 int bit_length() const noexcept { int lz = ctlz(); return int(limb_bits * vec.len()) - lz; } - bool mul(limb y) noexcept { + FASTFLOAT_CONSTEXPR20 bool mul(limb y) noexcept { return small_mul(vec, y); } - bool add(limb y) noexcept { + FASTFLOAT_CONSTEXPR20 bool add(limb y) noexcept { return small_add(vec, y); } // multiply as if by 2 raised to a power. - bool pow2(uint32_t exp) noexcept { + FASTFLOAT_CONSTEXPR20 bool pow2(uint32_t exp) noexcept { return shl(exp); } // multiply as if by 5 raised to a power. - bool pow5(uint32_t exp) noexcept { + FASTFLOAT_CONSTEXPR20 bool pow5(uint32_t exp) noexcept { // multiply by a power of 5 - static constexpr uint32_t large_step = 135; - static constexpr uint64_t small_power_of_5[] = { - 1UL, 5UL, 25UL, 125UL, 625UL, 3125UL, 15625UL, 78125UL, 390625UL, - 1953125UL, 9765625UL, 48828125UL, 244140625UL, 1220703125UL, - 6103515625UL, 30517578125UL, 152587890625UL, 762939453125UL, - 3814697265625UL, 19073486328125UL, 95367431640625UL, 476837158203125UL, - 2384185791015625UL, 11920928955078125UL, 59604644775390625UL, - 298023223876953125UL, 1490116119384765625UL, 7450580596923828125UL, - }; -#ifdef FASTFLOAT_64BIT_LIMB - constexpr static limb large_power_of_5[] = { - 1414648277510068013UL, 9180637584431281687UL, 4539964771860779200UL, - 10482974169319127550UL, 198276706040285095UL}; -#else - constexpr static limb large_power_of_5[] = { - 4279965485U, 329373468U, 4020270615U, 2137533757U, 4287402176U, - 1057042919U, 1071430142U, 2440757623U, 381945767U, 46164893U}; -#endif size_t large_length = sizeof(large_power_of_5) / sizeof(limb); limb_span large = limb_span(large_power_of_5, large_length); while (exp >= large_step) { @@ -9561,14 +10533,19 @@ struct bigint { exp -= small_step; } if (exp != 0) { - FASTFLOAT_TRY(small_mul(vec, limb(small_power_of_5[exp]))); + // Work around clang bug https://godbolt.org/z/zedh7rrhc + // This is similar to https://github.com/llvm/llvm-project/issues/47746, + // except the workaround described there don't work here + FASTFLOAT_TRY( + small_mul(vec, limb(((void)small_power_of_5[0], small_power_of_5[exp]))) + ); } return true; } // multiply as if by 10 raised to a power. - bool pow10(uint32_t exp) noexcept { + FASTFLOAT_CONSTEXPR20 bool pow10(uint32_t exp) noexcept { FASTFLOAT_TRY(pow5(exp)); return pow2(exp); } @@ -9578,12 +10555,11 @@ struct bigint { #endif - -#ifndef FASTFLOAT_ASCII_NUMBER_H -#define FASTFLOAT_ASCII_NUMBER_H +#ifndef FASTFLOAT_DIGIT_COMPARISON_H +#define FASTFLOAT_DIGIT_COMPARISON_H //included above: -//#include +//#include //included above: //#include //included above: @@ -9594,333 +10570,90 @@ struct bigint { namespace fast_float { -// Next function can be micro-optimized, but compilers are entirely -// able to optimize it well. -fastfloat_really_inline bool is_integer(char c) noexcept { return c >= '0' && c <= '9'; } +// 1e0 to 1e19 +constexpr static uint64_t powers_of_ten_uint64[] = { + 1UL, 10UL, 100UL, 1000UL, 10000UL, 100000UL, 1000000UL, 10000000UL, 100000000UL, + 1000000000UL, 10000000000UL, 100000000000UL, 1000000000000UL, 10000000000000UL, + 100000000000000UL, 1000000000000000UL, 10000000000000000UL, 100000000000000000UL, + 1000000000000000000UL, 10000000000000000000UL}; -fastfloat_really_inline uint64_t byteswap(uint64_t val) { - return (val & 0xFF00000000000000) >> 56 - | (val & 0x00FF000000000000) >> 40 - | (val & 0x0000FF0000000000) >> 24 - | (val & 0x000000FF00000000) >> 8 - | (val & 0x00000000FF000000) << 8 - | (val & 0x0000000000FF0000) << 24 - | (val & 0x000000000000FF00) << 40 - | (val & 0x00000000000000FF) << 56; +// calculate the exponent, in scientific notation, of the number. +// this algorithm is not even close to optimized, but it has no practical +// effect on performance: in order to have a faster algorithm, we'd need +// to slow down performance for faster algorithms, and this is still fast. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 +int32_t scientific_exponent(parsed_number_string_t & num) noexcept { + uint64_t mantissa = num.mantissa; + int32_t exponent = int32_t(num.exponent); + while (mantissa >= 10000) { + mantissa /= 10000; + exponent += 4; + } + while (mantissa >= 100) { + mantissa /= 100; + exponent += 2; + } + while (mantissa >= 10) { + mantissa /= 10; + exponent += 1; + } + return exponent; } -fastfloat_really_inline uint64_t read_u64(const char *chars) { - uint64_t val; - ::memcpy(&val, chars, sizeof(uint64_t)); -#if FASTFLOAT_IS_BIG_ENDIAN == 1 - // Need to read as-if the number was in little-endian order. - val = byteswap(val); -#endif - return val; -} +// this converts a native floating-point number to an extended-precision float. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +adjusted_mantissa to_extended(T value) noexcept { + using equiv_uint = typename binary_format::equiv_uint; + constexpr equiv_uint exponent_mask = binary_format::exponent_mask(); + constexpr equiv_uint mantissa_mask = binary_format::mantissa_mask(); + constexpr equiv_uint hidden_bit_mask = binary_format::hidden_bit_mask(); -fastfloat_really_inline void write_u64(uint8_t *chars, uint64_t val) { -#if FASTFLOAT_IS_BIG_ENDIAN == 1 - // Need to read as-if the number was in little-endian order. - val = byteswap(val); + adjusted_mantissa am; + int32_t bias = binary_format::mantissa_explicit_bits() - binary_format::minimum_exponent(); + equiv_uint bits; +#if FASTFLOAT_HAS_BIT_CAST + bits = std::bit_cast(value); +#else + ::memcpy(&bits, &value, sizeof(T)); #endif - ::memcpy(chars, &val, sizeof(uint64_t)); -} - -// credit @aqrit -fastfloat_really_inline uint32_t parse_eight_digits_unrolled(uint64_t val) { - const uint64_t mask = 0x000000FF000000FF; - const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32) - const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32) - val -= 0x3030303030303030; - val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8; - val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; - return uint32_t(val); -} - -fastfloat_really_inline uint32_t parse_eight_digits_unrolled(const char *chars) noexcept { - return parse_eight_digits_unrolled(read_u64(chars)); -} + if ((bits & exponent_mask) == 0) { + // denormal + am.power2 = 1 - bias; + am.mantissa = bits & mantissa_mask; + } else { + // normal + am.power2 = int32_t((bits & exponent_mask) >> binary_format::mantissa_explicit_bits()); + am.power2 -= bias; + am.mantissa = (bits & mantissa_mask) | hidden_bit_mask; + } -// credit @aqrit -fastfloat_really_inline bool is_made_of_eight_digits_fast(uint64_t val) noexcept { - return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) & - 0x8080808080808080)); + return am; } -fastfloat_really_inline bool is_made_of_eight_digits_fast(const char *chars) noexcept { - return is_made_of_eight_digits_fast(read_u64(chars)); -} - -typedef span byte_span; - -struct parsed_number_string { - int64_t exponent{0}; - uint64_t mantissa{0}; - const char *lastmatch{nullptr}; - bool negative{false}; - bool valid{false}; - bool too_many_digits{false}; - // contains the range of the significant digits - byte_span integer{}; // non-nullable - byte_span fraction{}; // nullable -}; - -// Assuming that you use no more than 19 digits, this will -// parse an ASCII string. -fastfloat_really_inline -parsed_number_string parse_number_string(const char *p, const char *pend, parse_options options) noexcept { - const chars_format fmt = options.format; - const char decimal_point = options.decimal_point; - - parsed_number_string answer; - answer.valid = false; - answer.too_many_digits = false; - answer.negative = (*p == '-'); - if (*p == '-') { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here - ++p; - if (p == pend) { - return answer; - } - if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot - return answer; - } - } - const char *const start_digits = p; - - uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad) - - while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok - p += 8; - } - while ((p != pend) && is_integer(*p)) { - // a multiplication by 10 is cheaper than an arbitrary integer - // multiplication - i = 10 * i + - uint64_t(*p - '0'); // might overflow, we will handle the overflow later - ++p; - } - const char *const end_of_integer_part = p; - int64_t digit_count = int64_t(end_of_integer_part - start_digits); - answer.integer = byte_span(start_digits, size_t(digit_count)); - int64_t exponent = 0; - if ((p != pend) && (*p == decimal_point)) { - ++p; - const char* before = p; - // can occur at most twice without overflowing, but let it occur more, since - // for integers with many digits, digit parsing is the primary bottleneck. - while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok - p += 8; - } - while ((p != pend) && is_integer(*p)) { - uint8_t digit = uint8_t(*p - '0'); - ++p; - i = i * 10 + digit; // in rare cases, this will overflow, but that's ok - } - exponent = before - p; - answer.fraction = byte_span(before, size_t(p - before)); - digit_count -= exponent; - } - // we must have encountered at least one integer! - if (digit_count == 0) { - return answer; - } - int64_t exp_number = 0; // explicit exponential part - if ((fmt & chars_format::scientific) && (p != pend) && (('e' == *p) || ('E' == *p))) { - const char * location_of_e = p; - ++p; - bool neg_exp = false; - if ((p != pend) && ('-' == *p)) { - neg_exp = true; - ++p; - } else if ((p != pend) && ('+' == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1) - ++p; - } - if ((p == pend) || !is_integer(*p)) { - if(!(fmt & chars_format::fixed)) { - // We are in error. - return answer; - } - // Otherwise, we will be ignoring the 'e'. - p = location_of_e; - } else { - while ((p != pend) && is_integer(*p)) { - uint8_t digit = uint8_t(*p - '0'); - if (exp_number < 0x10000000) { - exp_number = 10 * exp_number + digit; - } - ++p; - } - if(neg_exp) { exp_number = - exp_number; } - exponent += exp_number; - } - } else { - // If it scientific and not fixed, we have to bail out. - if((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) { return answer; } - } - answer.lastmatch = p; - answer.valid = true; - - // If we frequently had to deal with long strings of digits, - // we could extend our code by using a 128-bit integer instead - // of a 64-bit integer. However, this is uncommon. - // - // We can deal with up to 19 digits. - if (digit_count > 19) { // this is uncommon - // It is possible that the integer had an overflow. - // We have to handle the case where we have 0.0000somenumber. - // We need to be mindful of the case where we only have zeroes... - // E.g., 0.000000000...000. - const char *start = start_digits; - while ((start != pend) && (*start == '0' || *start == decimal_point)) { - if(*start == '0') { digit_count --; } - start++; - } - if (digit_count > 19) { - answer.too_many_digits = true; - // Let us start again, this time, avoiding overflows. - // We don't need to check if is_integer, since we use the - // pre-tokenized spans from above. - i = 0; - p = answer.integer.ptr; - const char* int_end = p + answer.integer.len(); - const uint64_t minimal_nineteen_digit_integer{1000000000000000000}; - while((i < minimal_nineteen_digit_integer) && (p != int_end)) { - i = i * 10 + uint64_t(*p - '0'); - ++p; - } - if (i >= minimal_nineteen_digit_integer) { // We have a big integers - exponent = end_of_integer_part - p + exp_number; - } else { // We have a value with a fractional component. - p = answer.fraction.ptr; - const char* frac_end = p + answer.fraction.len(); - while((i < minimal_nineteen_digit_integer) && (p != frac_end)) { - i = i * 10 + uint64_t(*p - '0'); - ++p; - } - exponent = answer.fraction.ptr - p + exp_number; - } - // We have now corrected both exponent and i, to a truncated value - } - } - answer.exponent = exponent; - answer.mantissa = i; - return answer; -} - -} // namespace fast_float - -#endif - - -#ifndef FASTFLOAT_DIGIT_COMPARISON_H -#define FASTFLOAT_DIGIT_COMPARISON_H - -//included above: -//#include -//included above: -//#include -//included above: -//#include -//included above: -//#include - - -namespace fast_float { - -// 1e0 to 1e19 -constexpr static uint64_t powers_of_ten_uint64[] = { - 1UL, 10UL, 100UL, 1000UL, 10000UL, 100000UL, 1000000UL, 10000000UL, 100000000UL, - 1000000000UL, 10000000000UL, 100000000000UL, 1000000000000UL, 10000000000000UL, - 100000000000000UL, 1000000000000000UL, 10000000000000000UL, 100000000000000000UL, - 1000000000000000000UL, 10000000000000000000UL}; - -// calculate the exponent, in scientific notation, of the number. -// this algorithm is not even close to optimized, but it has no practical -// effect on performance: in order to have a faster algorithm, we'd need -// to slow down performance for faster algorithms, and this is still fast. -fastfloat_really_inline int32_t scientific_exponent(parsed_number_string& num) noexcept { - uint64_t mantissa = num.mantissa; - int32_t exponent = int32_t(num.exponent); - while (mantissa >= 10000) { - mantissa /= 10000; - exponent += 4; - } - while (mantissa >= 100) { - mantissa /= 100; - exponent += 2; - } - while (mantissa >= 10) { - mantissa /= 10; - exponent += 1; - } - return exponent; -} - -// this converts a native floating-point number to an extended-precision float. -template -fastfloat_really_inline adjusted_mantissa to_extended(T value) noexcept { - adjusted_mantissa am; - int32_t bias = binary_format::mantissa_explicit_bits() - binary_format::minimum_exponent(); - if (std::is_same::value) { - constexpr uint32_t exponent_mask = 0x7F800000; - constexpr uint32_t mantissa_mask = 0x007FFFFF; - constexpr uint64_t hidden_bit_mask = 0x00800000; - uint32_t bits; - ::memcpy(&bits, &value, sizeof(T)); - if ((bits & exponent_mask) == 0) { - // denormal - am.power2 = 1 - bias; - am.mantissa = bits & mantissa_mask; - } else { - // normal - am.power2 = int32_t((bits & exponent_mask) >> binary_format::mantissa_explicit_bits()); - am.power2 -= bias; - am.mantissa = (bits & mantissa_mask) | hidden_bit_mask; - } - } else { - constexpr uint64_t exponent_mask = 0x7FF0000000000000; - constexpr uint64_t mantissa_mask = 0x000FFFFFFFFFFFFF; - constexpr uint64_t hidden_bit_mask = 0x0010000000000000; - uint64_t bits; - ::memcpy(&bits, &value, sizeof(T)); - if ((bits & exponent_mask) == 0) { - // denormal - am.power2 = 1 - bias; - am.mantissa = bits & mantissa_mask; - } else { - // normal - am.power2 = int32_t((bits & exponent_mask) >> binary_format::mantissa_explicit_bits()); - am.power2 -= bias; - am.mantissa = (bits & mantissa_mask) | hidden_bit_mask; - } - } - - return am; -} - -// get the extended precision value of the halfway point between b and b+u. -// we are given a native float that represents b, so we need to adjust it -// halfway between b and b+u. -template -fastfloat_really_inline adjusted_mantissa to_extended_halfway(T value) noexcept { - adjusted_mantissa am = to_extended(value); - am.mantissa <<= 1; - am.mantissa += 1; - am.power2 -= 1; - return am; +// get the extended precision value of the halfway point between b and b+u. +// we are given a native float that represents b, so we need to adjust it +// halfway between b and b+u. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +adjusted_mantissa to_extended_halfway(T value) noexcept { + adjusted_mantissa am = to_extended(value); + am.mantissa <<= 1; + am.mantissa += 1; + am.power2 -= 1; + return am; } // round an extended-precision float to the nearest machine float. template -fastfloat_really_inline void round(adjusted_mantissa& am, callback cb) noexcept { +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 +void round(adjusted_mantissa& am, callback cb) noexcept { int32_t mantissa_shift = 64 - binary_format::mantissa_explicit_bits() - 1; if (-am.power2 >= mantissa_shift) { // have a denormal float int32_t shift = -am.power2 + 1; - cb(am, std::min(shift, 64)); + cb(am, std::min(shift, 64)); // check for round-up: if rounding-nearest carried us to the hidden bit. am.power2 = (am.mantissa < (uint64_t(1) << binary_format::mantissa_explicit_bits())) ? 0 : 1; return; @@ -9944,23 +10677,19 @@ fastfloat_really_inline void round(adjusted_mantissa& am, callback cb) noexcept } template -fastfloat_really_inline +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void round_nearest_tie_even(adjusted_mantissa& am, int32_t shift, callback cb) noexcept { - uint64_t mask; - uint64_t halfway; - if (shift == 64) { - mask = UINT64_MAX; - } else { - mask = (uint64_t(1) << shift) - 1; - } - if (shift == 0) { - halfway = 0; - } else { - halfway = uint64_t(1) << (shift - 1); - } + const uint64_t mask + = (shift == 64) + ? UINT64_MAX + : (uint64_t(1) << shift) - 1; + const uint64_t halfway + = (shift == 0) + ? 0 + : uint64_t(1) << (shift - 1); uint64_t truncated_bits = am.mantissa & mask; - uint64_t is_above = truncated_bits > halfway; - uint64_t is_halfway = truncated_bits == halfway; + bool is_above = truncated_bits > halfway; + bool is_halfway = truncated_bits == halfway; // shift digits into position if (shift == 64) { @@ -9974,7 +10703,8 @@ void round_nearest_tie_even(adjusted_mantissa& am, int32_t shift, callback cb) n am.mantissa += uint64_t(cb(is_odd, is_halfway, is_above)); } -fastfloat_really_inline void round_down(adjusted_mantissa& am, int32_t shift) noexcept { +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 +void round_down(adjusted_mantissa& am, int32_t shift) noexcept { if (shift == 64) { am.mantissa = 0; } else { @@ -9982,18 +10712,19 @@ fastfloat_really_inline void round_down(adjusted_mantissa& am, int32_t shift) no } am.power2 += shift; } - -fastfloat_really_inline void skip_zeros(const char*& first, const char* last) noexcept { +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +void skip_zeros(UC const * & first, UC const * last) noexcept { uint64_t val; - while (std::distance(first, last) >= 8) { + while (!cpp20_and_in_constexpr() && std::distance(first, last) >= int_cmp_len()) { ::memcpy(&val, first, sizeof(uint64_t)); - if (val != 0x3030303030303030) { + if (val != int_cmp_zeros()) { break; } - first += 8; + first += int_cmp_len(); } while (first != last) { - if (*first != '0') { + if (*first != UC('0')) { break; } first++; @@ -10002,52 +10733,59 @@ fastfloat_really_inline void skip_zeros(const char*& first, const char* last) no // determine if any non-zero digits were truncated. // all characters must be valid digits. -fastfloat_really_inline bool is_truncated(const char* first, const char* last) noexcept { +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +bool is_truncated(UC const * first, UC const * last) noexcept { // do 8-bit optimizations, can just compare to 8 literal 0s. uint64_t val; - while (std::distance(first, last) >= 8) { + while (!cpp20_and_in_constexpr() && std::distance(first, last) >= int_cmp_len()) { ::memcpy(&val, first, sizeof(uint64_t)); - if (val != 0x3030303030303030) { + if (val != int_cmp_zeros()) { return true; } - first += 8; + first += int_cmp_len(); } while (first != last) { - if (*first != '0') { + if (*first != UC('0')) { return true; } - first++; + ++first; } return false; } - -fastfloat_really_inline bool is_truncated(byte_span s) noexcept { +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +bool is_truncated(span s) noexcept { return is_truncated(s.ptr, s.ptr + s.len()); } -fastfloat_really_inline -void parse_eight_digits(const char*& p, limb& value, size_t& counter, size_t& count) noexcept { + +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +void parse_eight_digits(const UC*& p, limb& value, size_t& counter, size_t& count) noexcept { value = value * 100000000 + parse_eight_digits_unrolled(p); p += 8; counter += 8; count += 8; } -fastfloat_really_inline -void parse_one_digit(const char*& p, limb& value, size_t& counter, size_t& count) noexcept { - value = value * 10 + limb(*p - '0'); +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 +void parse_one_digit(UC const *& p, limb& value, size_t& counter, size_t& count) noexcept { + value = value * 10 + limb(*p - UC('0')); p++; counter++; count++; } -fastfloat_really_inline +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void add_native(bigint& big, limb power, limb value) noexcept { big.mul(power); big.add(value); } -fastfloat_really_inline void round_up_bigint(bigint& big, size_t& count) noexcept { +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +void round_up_bigint(bigint& big, size_t& count) noexcept { // need to round-up the digits, but need to avoid rounding // ....9999 to ...10000, which could cause a false halfway point. add_native(big, 10, 1); @@ -10055,7 +10793,9 @@ fastfloat_really_inline void round_up_bigint(bigint& big, size_t& count) noexcep } // parse the significant digits into a big integer -inline void parse_mantissa(bigint& result, parsed_number_string& num, size_t max_digits, size_t& digits) noexcept { +template +inline FASTFLOAT_CONSTEXPR20 +void parse_mantissa(bigint& result, parsed_number_string_t& num, size_t max_digits, size_t& digits) noexcept { // try to minimize the number of big integer and scalar multiplication. // therefore, try to parse 8 digits at a time, and multiply by the largest // scalar value (9 or 19 digits) for each step. @@ -10069,8 +10809,8 @@ inline void parse_mantissa(bigint& result, parsed_number_string& num, size_t max #endif // process all integer digits. - const char* p = num.integer.ptr; - const char* pend = p + num.integer.len(); + UC const * p = num.integer.ptr; + UC const * pend = p + num.integer.len(); skip_zeros(p, pend); // process all digits, in increments of step per loop while (p != pend) { @@ -10135,7 +10875,8 @@ inline void parse_mantissa(bigint& result, parsed_number_string& num, size_t max } template -inline adjusted_mantissa positive_digit_comp(bigint& bigmant, int32_t exponent) noexcept { +inline FASTFLOAT_CONSTEXPR20 +adjusted_mantissa positive_digit_comp(bigint& bigmant, int32_t exponent) noexcept { FASTFLOAT_ASSERT(bigmant.pow10(uint32_t(exponent))); adjusted_mantissa answer; bool truncated; @@ -10158,7 +10899,8 @@ inline adjusted_mantissa positive_digit_comp(bigint& bigmant, int32_t exponent) // we then need to scale by `2^(f- e)`, and then the two significant digits // are of the same magnitude. template -inline adjusted_mantissa negative_digit_comp(bigint& bigmant, adjusted_mantissa am, int32_t exponent) noexcept { +inline FASTFLOAT_CONSTEXPR20 +adjusted_mantissa negative_digit_comp(bigint& bigmant, adjusted_mantissa am, int32_t exponent) noexcept { bigint& real_digits = bigmant; int32_t real_exp = exponent; @@ -10217,8 +10959,9 @@ inline adjusted_mantissa negative_digit_comp(bigint& bigmant, adjusted_mantissa // `b` as a big-integer type, scaled to the same binary exponent as // the actual digits. we then compare the big integer representations // of both, and use that to direct rounding. -template -inline adjusted_mantissa digit_comp(parsed_number_string& num, adjusted_mantissa am) noexcept { +template +inline FASTFLOAT_CONSTEXPR20 +adjusted_mantissa digit_comp(parsed_number_string_t& num, adjusted_mantissa am) noexcept { // remove the invalid exponent bias am.power2 -= invalid_am_bias; @@ -10240,7 +10983,6 @@ inline adjusted_mantissa digit_comp(parsed_number_string& num, adjusted_mantissa #endif - #ifndef FASTFLOAT_PARSE_NUMBER_H #define FASTFLOAT_PARSE_NUMBER_H @@ -10253,7 +10995,6 @@ inline adjusted_mantissa digit_comp(parsed_number_string& num, adjusted_mantissa //#include //included above: //#include - namespace fast_float { @@ -10263,35 +11004,41 @@ namespace detail { * The case comparisons could be made much faster given that we know that the * strings a null-free and fixed. **/ -template -from_chars_result parse_infnan(const char *first, const char *last, T &value) noexcept { - from_chars_result answer; +template +from_chars_result_t FASTFLOAT_CONSTEXPR14 +parse_infnan(UC const * first, UC const * last, T &value) noexcept { + from_chars_result_t answer{}; answer.ptr = first; answer.ec = std::errc(); // be optimistic bool minusSign = false; - if (*first == '-') { // assume first < last, so dereference without checks; C++17 20.19.3.(7.1) explicitly forbids '+' here + if (*first == UC('-')) { // assume first < last, so dereference without checks; C++17 20.19.3.(7.1) explicitly forbids '+' here minusSign = true; ++first; } +#ifdef FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default + if (*first == UC('+')) { + ++first; + } +#endif if (last - first >= 3) { - if (fastfloat_strncasecmp(first, "nan", 3)) { + if (fastfloat_strncasecmp(first, str_const_nan(), 3)) { answer.ptr = (first += 3); value = minusSign ? -std::numeric_limits::quiet_NaN() : std::numeric_limits::quiet_NaN(); // Check for possible nan(n-char-seq-opt), C++17 20.19.3.7, C11 7.20.1.3.3. At least MSVC produces nan(ind) and nan(snan). - if(first != last && *first == '(') { - for(const char* ptr = first + 1; ptr != last; ++ptr) { - if (*ptr == ')') { + if(first != last && *first == UC('(')) { + for(UC const * ptr = first + 1; ptr != last; ++ptr) { + if (*ptr == UC(')')) { answer.ptr = ptr + 1; // valid nan(n-char-seq-opt) break; } - else if(!(('a' <= *ptr && *ptr <= 'z') || ('A' <= *ptr && *ptr <= 'Z') || ('0' <= *ptr && *ptr <= '9') || *ptr == '_')) + else if(!((UC('a') <= *ptr && *ptr <= UC('z')) || (UC('A') <= *ptr && *ptr <= UC('Z')) || (UC('0') <= *ptr && *ptr <= UC('9')) || *ptr == UC('_'))) break; // forbidden char, not nan(n-char-seq-opt) } } return answer; } - if (fastfloat_strncasecmp(first, "inf", 3)) { - if ((last - first >= 8) && fastfloat_strncasecmp(first + 3, "inity", 5)) { + if (fastfloat_strncasecmp(first, str_const_inf(), 3)) { + if ((last - first >= 8) && fastfloat_strncasecmp(first + 3, str_const_inf() + 3, 5)) { answer.ptr = first + 8; } else { answer.ptr = first + 3; @@ -10304,40 +11051,183 @@ from_chars_result parse_infnan(const char *first, const char *last, T &value) n return answer; } +/** + * Returns true if the floating-pointing rounding mode is to 'nearest'. + * It is the default on most system. This function is meant to be inexpensive. + * Credit : @mwalcott3 + */ +fastfloat_really_inline bool rounds_to_nearest() noexcept { + // https://lemire.me/blog/2020/06/26/gcc-not-nearest/ +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + return false; +#endif + // See + // A fast function to check your floating-point rounding mode + // https://lemire.me/blog/2022/11/16/a-fast-function-to-check-your-floating-point-rounding-mode/ + // + // This function is meant to be equivalent to : + // prior: #include + // return fegetround() == FE_TONEAREST; + // However, it is expected to be much faster than the fegetround() + // function call. + // + // The volatile keywoard prevents the compiler from computing the function + // at compile-time. + // There might be other ways to prevent compile-time optimizations (e.g., asm). + // The value does not need to be std::numeric_limits::min(), any small + // value so that 1 + x should round to 1 would do (after accounting for excess + // precision, as in 387 instructions). + static volatile float fmin = std::numeric_limits::min(); + float fmini = fmin; // we copy it so that it gets loaded at most once. + // + // Explanation: + // Only when fegetround() == FE_TONEAREST do we have that + // fmin + 1.0f == 1.0f - fmin. + // + // FE_UPWARD: + // fmin + 1.0f > 1 + // 1.0f - fmin == 1 + // + // FE_DOWNWARD or FE_TOWARDZERO: + // fmin + 1.0f == 1 + // 1.0f - fmin < 1 + // + // Note: This may fail to be accurate if fast-math has been + // enabled, as rounding conventions may not apply. + #ifdef FASTFLOAT_VISUAL_STUDIO + # pragma warning(push) + // todo: is there a VS warning? + // see https://stackoverflow.com/questions/46079446/is-there-a-warning-for-floating-point-equality-checking-in-visual-studio-2013 + #elif defined(__clang__) + # pragma clang diagnostic push + # pragma clang diagnostic ignored "-Wfloat-equal" + #elif defined(__GNUC__) + # pragma GCC diagnostic push + # pragma GCC diagnostic ignored "-Wfloat-equal" + #endif + return (fmini + 1.0f == 1.0f - fmini); + #ifdef FASTFLOAT_VISUAL_STUDIO + # pragma warning(pop) + #elif defined(__clang__) + # pragma clang diagnostic pop + #elif defined(__GNUC__) + # pragma GCC diagnostic pop + #endif +} + } // namespace detail -template -from_chars_result from_chars(const char *first, const char *last, +template +struct from_chars_caller +{ + template + FASTFLOAT_CONSTEXPR20 + static from_chars_result_t call(UC const * first, UC const * last, + T &value, parse_options_t options) noexcept { + return from_chars_advanced(first, last, value, options); + } +}; + +#if __STDCPP_FLOAT32_T__ == 1 +template <> +struct from_chars_caller +{ + template + FASTFLOAT_CONSTEXPR20 + static from_chars_result_t call(UC const * first, UC const * last, + std::float32_t &value, parse_options_t options) noexcept{ + // if std::float32_t is defined, and we are in C++23 mode; macro set for float32; + // set value to float due to equivalence between float and float32_t + float val; + auto ret = from_chars_advanced(first, last, val, options); + value = val; + return ret; + } +}; +#endif + +#if __STDCPP_FLOAT64_T__ == 1 +template <> +struct from_chars_caller +{ + template + FASTFLOAT_CONSTEXPR20 + static from_chars_result_t call(UC const * first, UC const * last, + std::float64_t &value, parse_options_t options) noexcept{ + // if std::float64_t is defined, and we are in C++23 mode; macro set for float64; + // set value as double due to equivalence between double and float64_t + double val; + auto ret = from_chars_advanced(first, last, val, options); + value = val; + return ret; + } +}; +#endif + + +template +FASTFLOAT_CONSTEXPR20 +from_chars_result_t from_chars(UC const * first, UC const * last, T &value, chars_format fmt /*= chars_format::general*/) noexcept { - return from_chars_advanced(first, last, value, parse_options{fmt}); + return from_chars_caller::call(first, last, value, parse_options_t(fmt)); } -template -from_chars_result from_chars_advanced(const char *first, const char *last, - T &value, parse_options options) noexcept { +/** + * This function overload takes parsed_number_string_t structure that is created and populated + * either by from_chars_advanced function taking chars range and parsing options + * or other parsing custom function implemented by user. + */ +template +FASTFLOAT_CONSTEXPR20 +from_chars_result_t from_chars_advanced(parsed_number_string_t& pns, + T &value) noexcept { - static_assert (std::is_same::value || std::is_same::value, "only float and double are supported"); + static_assert (is_supported_float_type(), "only some floating-point types are supported"); + static_assert (is_supported_char_type(), "only char, wchar_t, char16_t and char32_t are supported"); + from_chars_result_t answer; - from_chars_result answer; - if (first == last) { - answer.ec = std::errc::invalid_argument; - answer.ptr = first; - return answer; - } - parsed_number_string pns = parse_number_string(first, last, options); - if (!pns.valid) { - return detail::parse_infnan(first, last, value); - } answer.ec = std::errc(); // be optimistic answer.ptr = pns.lastmatch; - // Next is Clinger's fast path. - if (binary_format::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format::max_exponent_fast_path() && pns.mantissa <=binary_format::max_mantissa_fast_path() && !pns.too_many_digits) { - value = T(pns.mantissa); - if (pns.exponent < 0) { value = value / binary_format::exact_power_of_ten(-pns.exponent); } - else { value = value * binary_format::exact_power_of_ten(pns.exponent); } - if (pns.negative) { value = -value; } - return answer; + // The implementation of the Clinger's fast path is convoluted because + // we want round-to-nearest in all cases, irrespective of the rounding mode + // selected on the thread. + // We proceed optimistically, assuming that detail::rounds_to_nearest() returns + // true. + if (binary_format::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format::max_exponent_fast_path() && !pns.too_many_digits) { + // Unfortunately, the conventional Clinger's fast path is only possible + // when the system rounds to the nearest float. + // + // We expect the next branch to almost always be selected. + // We could check it first (before the previous branch), but + // there might be performance advantages at having the check + // be last. + if(!cpp20_and_in_constexpr() && detail::rounds_to_nearest()) { + // We have that fegetround() == FE_TONEAREST. + // Next is Clinger's fast path. + if (pns.mantissa <=binary_format::max_mantissa_fast_path()) { + value = T(pns.mantissa); + if (pns.exponent < 0) { value = value / binary_format::exact_power_of_ten(-pns.exponent); } + else { value = value * binary_format::exact_power_of_ten(pns.exponent); } + if (pns.negative) { value = -value; } + return answer; + } + } else { + // We do not have that fegetround() == FE_TONEAREST. + // Next is a modified Clinger's fast path, inspired by Jakub Jelínek's proposal + if (pns.exponent >= 0 && pns.mantissa <=binary_format::max_mantissa_fast_path(pns.exponent)) { +#if defined(__clang__) || defined(FASTFLOAT_32BIT) + // Clang may map 0 to -0.0 when fegetround() == FE_DOWNWARD + if(pns.mantissa == 0) { + value = pns.negative ? T(-0.) : T(0.); + return answer; + } +#endif + value = T(pns.mantissa) * binary_format::exact_power_of_ten(pns.exponent); + if (pns.negative) { value = -value; } + return answer; + } + } } adjusted_mantissa am = compute_float>(pns.exponent, pns.mantissa); if(pns.too_many_digits && am.power2 >= 0) { @@ -10349,9 +11239,67 @@ from_chars_result from_chars_advanced(const char *first, const char *last, // then we need to go the long way around again. This is very uncommon. if(am.power2 < 0) { am = digit_comp(pns, am); } to_float(pns.negative, am, value); + // Test for over/underflow. + if ((pns.mantissa != 0 && am.mantissa == 0 && am.power2 == 0) || am.power2 == binary_format::infinite_power()) { + answer.ec = std::errc::result_out_of_range; + } return answer; } +template +FASTFLOAT_CONSTEXPR20 +from_chars_result_t from_chars_advanced(UC const * first, UC const * last, + T &value, parse_options_t options) noexcept { + + static_assert (is_supported_float_type(), "only some floating-point types are supported"); + static_assert (is_supported_char_type(), "only char, wchar_t, char16_t and char32_t are supported"); + + from_chars_result_t answer; +#ifdef FASTFLOAT_SKIP_WHITE_SPACE // disabled by default + while ((first != last) && fast_float::is_space(uint8_t(*first))) { + first++; + } +#endif + if (first == last) { + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + return answer; + } + parsed_number_string_t pns = parse_number_string(first, last, options); + if (!pns.valid) { + if (options.format & chars_format::no_infnan) { + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + return answer; + } else { + return detail::parse_infnan(first, last, value); + } + } + + // call overload that takes parsed_number_string_t directly. + return from_chars_advanced(pns, value); +} + + +template +FASTFLOAT_CONSTEXPR20 +from_chars_result_t from_chars(UC const* first, UC const* last, T& value, int base) noexcept { + static_assert (is_supported_char_type(), "only char, wchar_t, char16_t and char32_t are supported"); + + from_chars_result_t answer; +#ifdef FASTFLOAT_SKIP_WHITE_SPACE // disabled by default + while ((first != last) && fast_float::is_space(uint8_t(*first))) { + first++; + } +#endif + if (first == last || base < 2 || base > 36) { + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + return answer; + } + return parse_int_string(first, last, value, base); +} + } // namespace fast_float #endif @@ -10394,7 +11342,13 @@ __pragma(warning(disable : 4643)) #endif namespace std { template class allocator; +#ifdef _GLIBCXX_DEBUG +inline namespace __debug { +template class vector; +} +#else template class vector; +#endif } // namespace std #if defined(_MSC_VER) __pragma(warning(pop)) @@ -10484,12 +11438,21 @@ template bool from_chars(c4::csubstr buf, std::vector #elif defined(_LIBCPP_VERSION) || defined(__APPLE_CC__) #include // use the fwd header in stdlibc++ #elif defined(_MSC_VER) +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/error.hpp +//#include "c4/error.hpp" +#if !defined(C4_ERROR_HPP_) && !defined(_C4_ERROR_HPP_) +#error "amalgamate: file c4/error.hpp must have been included at this point" +#endif /* C4_ERROR_HPP_ */ + //! @todo is there a fwd header in msvc? namespace std { +C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(4643) // Forward declaring 'char_traits' in namespace std is not permitted by the C++ Standard. template struct char_traits; template class allocator; template class basic_string; using string = basic_string, allocator>; +C4_SUPPRESS_WARNING_MSVC_POP } /* namespace std */ #else #error "unknown standard library" @@ -10497,8 +11460,8 @@ using string = basic_string, allocator>; namespace c4 { -C4_ALWAYS_INLINE c4::substr to_substr(std::string &s) noexcept; -C4_ALWAYS_INLINE c4::csubstr to_csubstr(std::string const& s) noexcept; +c4::substr to_substr(std::string &s) noexcept; +c4::csubstr to_csubstr(std::string const& s) noexcept; bool operator== (c4::csubstr ss, std::string const& s); bool operator!= (c4::csubstr ss, std::string const& s); @@ -10560,6 +11523,13 @@ bool from_chars(c4::csubstr buf, std::string * s); // (end https://github.com/biojppm/c4core/src/c4/std/std_fwd.hpp) +// (amalgamate) this include is needed to work around +// conditional includes in charconv.hpp +#if (defined(_MSVC_LANG) && (_MSVC_LANG >= 201703L)) || (__cplusplus >= 201703L) +#include +#endif + + //******************************************************************************** @@ -10574,39 +11544,6 @@ bool from_chars(c4::csubstr buf, std::string * s); /** @file charconv.hpp Lightweight generic type-safe wrappers for * converting individual values to/from strings. - * - * These are the main functions: - * - * @code{.cpp} - * // Convert the given value, writing into the string. - * // The resulting string will NOT be null-terminated. - * // Return the number of characters needed. - * // This function is safe to call when the string is too small - - * // no writes will occur beyond the string's last character. - * template size_t c4::to_chars(substr buf, T const& C4_RESTRICT val); - * - * - * // Convert the given value to a string using to_chars(), and - * // return the resulting string, up to and including the last - * // written character. - * template substr c4::to_chars_sub(substr buf, T const& C4_RESTRICT val); - * - * - * // Read a value from the string, which must be - * // trimmed to the value (ie, no leading/trailing whitespace). - * // return true if the conversion succeeded. - * // There is no check for overflow; the value wraps around in a way similar - * // to the standard C/C++ overflow behavior. For example, - * // from_chars("128", &val) returns true and val will be - * // set tot 0. - * template bool c4::from_chars(csubstr buf, T * C4_RESTRICT val); - * - * - * // Read the first valid sequence of characters from the string, - * // skipping leading whitespace, and convert it using from_chars(). - * // Return the number of characters read for converting. - * template size_t c4::from_chars_first(csubstr buf, T * C4_RESTRICT val); - * @endcode */ // amalgamate: removed include of @@ -10667,7 +11604,8 @@ bool from_chars(c4::csubstr buf, std::string * s); # if (C4_CPP >= 17) # if defined(_MSC_VER) # if (C4_MSVC_VERSION >= C4_MSVC_VERSION_2019) // VS2017 and lower do not have these macros -# include +//included above: +//# include # define C4CORE_HAVE_STD_TOCHARS 1 # define C4CORE_HAVE_STD_FROMCHARS 0 // prefer fast_float with MSVC # define C4CORE_HAVE_FAST_FLOAT 1 @@ -10703,7 +11641,7 @@ bool from_chars(c4::csubstr buf, std::string * s); # if C4CORE_HAVE_FAST_FLOAT C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wsign-conversion") C4_SUPPRESS_WARNING_GCC("-Warray-bounds") -# if __GNUC__ >= 5 +# if defined(__GNUC__) && __GNUC__ >= 5 C4_SUPPRESS_WARNING_GCC("-Wshift-count-overflow") # endif // amalgamate: removed include of @@ -10755,27 +11693,88 @@ bool from_chars(c4::csubstr buf, std::string * s); #endif -#ifdef _MSC_VER +#if defined(_MSC_VER) # pragma warning(push) +# pragma warning(disable: 4996) // snprintf/scanf: this function or variable may be unsafe # if C4_MSVC_VERSION != C4_MSVC_VERSION_2017 # pragma warning(disable: 4800) //'int': forcing value to bool 'true' or 'false' (performance warning) # endif -# pragma warning(disable: 4996) // snprintf/scanf: this function or variable may be unsafe -#elif defined(__clang__) +#endif + +#if defined(__clang__) # pragma clang diagnostic push # pragma clang diagnostic ignored "-Wtautological-constant-out-of-range-compare" # pragma clang diagnostic ignored "-Wformat-nonliteral" # pragma clang diagnostic ignored "-Wdouble-promotion" // implicit conversion increases floating-point precision +# pragma clang diagnostic ignored "-Wold-style-cast" #elif defined(__GNUC__) # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Wformat-nonliteral" # pragma GCC diagnostic ignored "-Wdouble-promotion" // implicit conversion increases floating-point precision # pragma GCC diagnostic ignored "-Wuseless-cast" +# pragma GCC diagnostic ignored "-Wold-style-cast" +#endif + +#if defined(__clang__) +#define C4_NO_UBSAN_IOVRFLW __attribute__((no_sanitize("signed-integer-overflow"))) +#elif defined(__GNUC__) +#if __GNUC__ > 7 +#define C4_NO_UBSAN_IOVRFLW __attribute__((no_sanitize("signed-integer-overflow"))) +#else +#define C4_NO_UBSAN_IOVRFLW +#endif +#else +#define C4_NO_UBSAN_IOVRFLW #endif namespace c4 { +/** @defgroup doc_charconv Charconv utilities + * + * Lightweight, very fast generic type-safe wrappers for converting + * individual values to/from strings. These are the main generic + * functions: + * - @ref doc_to_chars and its alias @ref doc_xtoa: implemented by calling @ref itoa()/@ref utoa()/@ref ftoa()/@ref dtoa() (or generically @ref xtoa()) + * - @ref doc_from_chars and its alias @ref doc_atox: implemented by calling @ref atoi()/@ref atou()/@ref atof()/@ref atod() (or generically @ref atox()) + * - @ref to_chars_sub() + * - @ref from_chars_first() + * - @ref xtoa()/@ref atox() are implemented in terms @ref write_dec()/@ref read_dec() et al (see @ref doc_write/@ref doc_read()) + * + * And also some modest brag is in order: these functions are really + * fast: faster even than C++17 `std::to_chars()` and + * `std::to_chars()`, and many dozens of times faster than the + * iostream abominations. + * + * For example, here are some benchmark comparisons for @ref + * doc_from_chars (link leads to the main project README, where these + * results are shown more systematically). + * + * + * + *
atox,int64_t
g++12, linux Visual Studio 2019 + *
\image html linux-x86_64-gxx12.1-Release-c4core-bm-charconv-atox-mega_bytes_per_second-i64.png \image html windows-x86_64-vs2019-Release-c4core-bm-charconv-atox-mega_bytes_per_second-i64.png + *
+ * + * + * + *
xtoa,int64_t
g++12, linux Visual Studio 2019 + *
\image html linux-x86_64-gxx12.1-Release-c4core-bm-charconv-xtoa-mega_bytes_per_second-i64.png \image html windows-x86_64-vs2019-Release-c4core-bm-charconv-xtoa-mega_bytes_per_second-i64.png + *
+ * + * To parse floating point, c4core uses + * [fastfloat](https://github.com/fastfloat/fast_float), which is + * extremely fast, by an even larger factor: + * + * + * + *
atox,float
g++12, linux Visual Studio 2019 + *
\image html linux-x86_64-gxx12.1-Release-c4core-bm-charconv-atof-mega_bytes_per_second-float.png \image html windows-x86_64-vs2019-Release-c4core-bm-charconv-atof-mega_bytes_per_second-float.png + *
+ * + * @{ + */ + #if C4CORE_HAVE_STD_TOCHARS /** @warning Use only the symbol. Do not rely on the type or naked value of this enum. */ typedef enum : std::underlying_type::type { @@ -10802,7 +11801,7 @@ typedef enum : char { } RealFormat_e; #endif - +/** @cond dev */ /** in some platforms, int,unsigned int * are not any of int8_t...int64_t and * long,unsigned long are not any of uint8_t...uint64_t */ @@ -10826,6 +11825,7 @@ struct is_fixed_length value = value_i || value_u }; }; +/** @endcond */ //----------------------------------------------------------------------------- @@ -10844,6 +11844,7 @@ struct is_fixed_length # endif #endif +/** @cond dev */ namespace detail { /* python command to get the values below: @@ -11003,25 +12004,30 @@ template<> struct charconv_digits_<8u, false> }; } // namespace detail +// Helper macros, undefined below +#define _c4append(c) { if(C4_LIKELY(pos < buf.len)) { buf.str[pos++] = static_cast(c); } else { ++pos; } } +#define _c4appendhex(i) { if(C4_LIKELY(pos < buf.len)) { buf.str[pos++] = hexchars[i]; } else { ++pos; } } + +/** @endcond */ + //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- -// Helper macros, undefined below -#define _c4append(c) { if(C4_LIKELY(pos < buf.len)) { buf.str[pos++] = static_cast(c); } else { ++pos; } } -#define _c4appendhex(i) { if(C4_LIKELY(pos < buf.len)) { buf.str[pos++] = hexchars[i]; } else { ++pos; } } -/** @name digits_dec return the number of digits required to encode a - * decimal number. +/** @defgroup doc_digits Get number of digits * * @note At first sight this code may look heavily branchy and * therefore inefficient. However, measurements revealed this to be * the fastest among the alternatives. * - * @see https://github.com/biojppm/c4core/pull/77 */ -/** @{ */ + * @see https://github.com/biojppm/c4core/pull/77 + * + * @{ + */ +/** decimal digits for 8 bit integers */ template C4_CONSTEXPR14 C4_ALWAYS_INLINE auto digits_dec(T v) noexcept @@ -11032,6 +12038,7 @@ auto digits_dec(T v) noexcept return ((v >= 100) ? 3u : ((v >= 10) ? 2u : 1u)); } +/** decimal digits for 16 bit integers */ template C4_CONSTEXPR14 C4_ALWAYS_INLINE auto digits_dec(T v) noexcept @@ -11042,6 +12049,7 @@ auto digits_dec(T v) noexcept return ((v >= 10000) ? 5u : (v >= 1000) ? 4u : (v >= 100) ? 3u : (v >= 10) ? 2u : 1u); } +/** decimal digits for 32 bit integers */ template C4_CONSTEXPR14 C4_ALWAYS_INLINE auto digits_dec(T v) noexcept @@ -11054,6 +12062,7 @@ auto digits_dec(T v) noexcept (v >= 1000) ? 4u : (v >= 100) ? 3u : (v >= 10) ? 2u : 1u); } +/** decimal digits for 64 bit integers */ template C4_CONSTEXPR14 C4_ALWAYS_INLINE auto digits_dec(T v) noexcept @@ -11101,9 +12110,8 @@ auto digits_dec(T v) noexcept return (v >= 10) ? 2u : 1u; } -/** @} */ - +/** return the number of digits required to encode an hexadecimal number. */ template C4_CONSTEXPR14 C4_ALWAYS_INLINE unsigned digits_hex(T v) noexcept { @@ -11112,6 +12120,7 @@ C4_CONSTEXPR14 C4_ALWAYS_INLINE unsigned digits_hex(T v) noexcept return v ? 1u + (msb((typename std::make_unsigned::type)v) >> 2u) : 1u; } +/** return the number of digits required to encode a binary number. */ template C4_CONSTEXPR14 C4_ALWAYS_INLINE unsigned digits_bin(T v) noexcept { @@ -11120,6 +12129,7 @@ C4_CONSTEXPR14 C4_ALWAYS_INLINE unsigned digits_bin(T v) noexcept return v ? 1u + msb((typename std::make_unsigned::type)v) : 1u; } +/** return the number of digits required to encode an octal number. */ template C4_CONSTEXPR14 C4_ALWAYS_INLINE unsigned digits_oct(T v_) noexcept { @@ -11150,11 +12160,14 @@ C4_CONSTEXPR14 C4_ALWAYS_INLINE unsigned digits_oct(T v_) noexcept } } +/** @} */ + //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- +/** @cond dev */ namespace detail { C4_INLINE_CONSTEXPR const char hexchars[] = "0123456789abcdef"; C4_INLINE_CONSTEXPR const char digits0099[] = @@ -11164,6 +12177,7 @@ C4_INLINE_CONSTEXPR const char digits0099[] = "6061626364656667686970717273747576777879" "8081828384858687888990919293949596979899"; } // namespace detail +/** @endcond */ C4_SUPPRESS_WARNING_GCC_PUSH C4_SUPPRESS_WARNING_GCC("-Warray-bounds") // gcc has false positives here @@ -11171,6 +12185,16 @@ C4_SUPPRESS_WARNING_GCC("-Warray-bounds") // gcc has false positives here C4_SUPPRESS_WARNING_GCC("-Wstringop-overflow") // gcc has false positives here #endif +/** @defgroup doc_write_unchecked Write with known number of digits + * + * Writes a value without checking the buffer length with regards to + * the required number of digits to encode the value. It is the + * responsibility of the caller to ensure that the provided number of + * digits is enough to write the given value. Notwithstanding the + * name, assertions are liberally performed, so this code is safe. + * + * @{ */ + template C4_HOT C4_ALWAYS_INLINE void write_dec_unchecked(substr buf, T v, unsigned digits_v) noexcept @@ -11182,7 +12206,8 @@ void write_dec_unchecked(substr buf, T v, unsigned digits_v) noexcept // in bm_xtoa: checkoncelog_singlediv_write2 while(v >= T(100)) { - const T quo = v / T(100); + T quo = v; + quo /= T(100); const auto num = (v - quo * T(100)) << 1u; v = quo; buf.str[--digits_v] = detail::digits0099[num + 1]; @@ -11250,6 +12275,19 @@ void write_bin_unchecked(substr buf, T v, unsigned digits_v) noexcept C4_ASSERT(digits_v == 0); } +/** @} */ // write_unchecked + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** @defgroup doc_write Write a value + * + * Writes a value without checking the buffer length + * decimal number -- but asserting. + * + * @{ */ /** write an integer to a string in decimal format. This is the * lowest level (and the fastest) function to do this task. @@ -11328,6 +12366,7 @@ C4_ALWAYS_INLINE size_t write_bin(substr buf, T v) noexcept } +/** @cond dev */ namespace detail { template using NumberWriter = size_t (*)(substr, U); template writer> @@ -11346,6 +12385,7 @@ size_t write_num_digits(substr buf, T v, size_t num_digits) noexcept return num_digits; } } // namespace detail +/** @endcond */ /** same as c4::write_dec(), but pad with zeroes on the left @@ -11384,6 +12424,8 @@ C4_ALWAYS_INLINE size_t write_oct(substr buf, T val, size_t num_digits) noexcept return detail::write_num_digits>(buf, val, num_digits); } +/** @} */ // write + C4_SUPPRESS_WARNING_GCC_POP @@ -11391,6 +12433,14 @@ C4_SUPPRESS_WARNING_GCC_POP //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- + +C4_SUPPRESS_WARNING_MSVC_PUSH +C4_SUPPRESS_WARNING_MSVC(4365) // '=': conversion from 'int' to 'I', signed/unsigned mismatch + +/** @defgroup doc_read Read a value + * + * @{ */ + /** read a decimal integer from a string. This is the * lowest level (and the fastest) function to do this task. * @note does not accept negative numbers @@ -11403,6 +12453,7 @@ C4_SUPPRESS_WARNING_GCC_POP * @see overflows() to find out if a number string overflows a type range * @return true if the conversion was successful (no overflow check) */ template +C4_NO_UBSAN_IOVRFLW C4_ALWAYS_INLINE bool read_dec(csubstr s, I *C4_RESTRICT v) noexcept { C4_STATIC_ASSERT(std::is_integral::value); @@ -11430,6 +12481,7 @@ C4_ALWAYS_INLINE bool read_dec(csubstr s, I *C4_RESTRICT v) noexcept * @see overflows() to find out if a number string overflows a type range * @return true if the conversion was successful (no overflow check) */ template +C4_NO_UBSAN_IOVRFLW C4_ALWAYS_INLINE bool read_hex(csubstr s, I *C4_RESTRICT v) noexcept { C4_STATIC_ASSERT(std::is_integral::value); @@ -11464,6 +12516,7 @@ C4_ALWAYS_INLINE bool read_hex(csubstr s, I *C4_RESTRICT v) noexcept * @see overflows() to find out if a number string overflows a type range * @return true if the conversion was successful (no overflow check) */ template +C4_NO_UBSAN_IOVRFLW C4_ALWAYS_INLINE bool read_bin(csubstr s, I *C4_RESTRICT v) noexcept { C4_STATIC_ASSERT(std::is_integral::value); @@ -11493,6 +12546,7 @@ C4_ALWAYS_INLINE bool read_bin(csubstr s, I *C4_RESTRICT v) noexcept * @see overflows() to find out if a number string overflows a type range * @return true if the conversion was successful (no overflow check) */ template +C4_NO_UBSAN_IOVRFLW C4_ALWAYS_INLINE bool read_oct(csubstr s, I *C4_RESTRICT v) noexcept { C4_STATIC_ASSERT(std::is_integral::value); @@ -11507,11 +12561,18 @@ C4_ALWAYS_INLINE bool read_oct(csubstr s, I *C4_RESTRICT v) noexcept return true; } +/** @} */ + +C4_SUPPRESS_WARNING_MSVC_POP + //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- +C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wswitch-default") + +/** @cond dev */ namespace detail { inline size_t _itoa2buf(substr buf, size_t pos, csubstr val) noexcept { @@ -11600,7 +12661,7 @@ C4_NO_INLINE size_t _itoa2buf(substr buf, I radix, size_t num_digits) noexcept buf.str[pos++] = 'x'; pos = _itoa2bufwithdigits(buf, pos, num_digits, digits_type::min_value_hex()); break; - case I( 2): + case I(2): // add 3 to account for -0b needed_digits = num_digits+3 > digits_type::maxdigits_bin ? num_digits+3 : digits_type::maxdigits_bin; if(C4_UNLIKELY(buf.len < needed_digits)) @@ -11610,7 +12671,7 @@ C4_NO_INLINE size_t _itoa2buf(substr buf, I radix, size_t num_digits) noexcept buf.str[pos++] = 'b'; pos = _itoa2bufwithdigits(buf, pos, num_digits, digits_type::min_value_bin()); break; - case I( 8): + case I(8): // add 3 to account for -0o needed_digits = num_digits+3 > digits_type::maxdigits_oct ? num_digits+3 : digits_type::maxdigits_oct; if(C4_UNLIKELY(buf.len < needed_digits)) @@ -11624,7 +12685,12 @@ C4_NO_INLINE size_t _itoa2buf(substr buf, I radix, size_t num_digits) noexcept return pos; } } // namespace detail +/** @endcond */ + +/** @defgroup doc_itoa itoa: signed to chars + * + * @{ */ /** convert an integral signed decimal to a string. * @note the resulting string is NOT zero-terminated. @@ -11809,11 +12875,17 @@ C4_ALWAYS_INLINE size_t itoa(substr buf, T v, T radix, size_t num_digits) noexce return detail::_itoa2buf(buf, radix, num_digits); } +/** @} */ + //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- +/** @defgroup doc_utoa utoa: unsigned to chars + * + * @{ */ + /** convert an integral unsigned decimal to a string. * * @note the resulting string is NOT zero-terminated. @@ -11937,12 +13009,19 @@ C4_ALWAYS_INLINE size_t utoa(substr buf, T v, T radix, size_t num_digits) noexce } return total_digits; } +C4_SUPPRESS_WARNING_GCC_POP + +/** @} */ //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- +/** @defgroup doc_atoi atoi: chars to signed + * + * @{ */ + /** Convert a trimmed string to a signed integral value. The input * string can be formatted as decimal, binary (prefix 0b or 0B), octal * (prefix 0o or 0O) or hexadecimal (prefix 0x or 0X). Strings with @@ -11953,16 +13032,18 @@ C4_ALWAYS_INLINE size_t utoa(substr buf, T v, T radix, size_t num_digits) noexce * * @return true if the conversion was successful. * - * @note overflow is not detected: the return status is true even if - * the conversion would return a value outside of the type's range, in - * which case the result will wrap around the type's range. - * This is similar to native behavior. - * * @note a positive sign is not accepted. ie, the string must not * start with '+' * + * @note overflow is not detected: the return status is true even if + * the conversion would return a value outside of the type's range, in + * which case the result will wrap around the type's range. This is + * similar to native behavior. See @ref doc_overflows and @ref + * doc_overflow_checked for overflow checking utilities. + * * @see atoi_first() if the string is not trimmed to the value to read. */ template +C4_NO_UBSAN_IOVRFLW C4_ALWAYS_INLINE bool atoi(csubstr str, T * C4_RESTRICT v) noexcept { C4_STATIC_ASSERT(std::is_integral::value); @@ -12027,8 +13108,16 @@ C4_ALWAYS_INLINE size_t atoi_first(csubstr str, T * C4_RESTRICT v) return csubstr::npos; } +/** @} */ + //----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** @defgroup doc_atou atou: chars to unsigned + * + * @{ */ /** Convert a trimmed string to an unsigned integral value. The string can be * formatted as decimal, binary (prefix 0b or 0B), octal (prefix 0o or 0O) @@ -12039,7 +13128,9 @@ C4_ALWAYS_INLINE size_t atoi_first(csubstr str, T * C4_RESTRICT v) * * @note overflow is not detected: the return status is true even if * the conversion would return a value outside of the type's range, in - * which case the result will wrap around the type's range. + * which case the result will wrap around the type's range. See @ref + * doc_overflows and @ref doc_overflow_checked for overflow checking + * utilities. * * @note If the string has a minus character, the return status * will be false. @@ -12099,6 +13190,8 @@ C4_ALWAYS_INLINE size_t atou_first(csubstr str, T *v) } +/** @} */ + #ifdef _MSC_VER # pragma warning(pop) #elif defined(__clang__) @@ -12111,6 +13204,8 @@ C4_ALWAYS_INLINE size_t atou_first(csubstr str, T *v) //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- + +/** @cond dev */ namespace detail { inline bool check_overflow(csubstr str, csubstr limit) noexcept { @@ -12129,15 +13224,22 @@ inline bool check_overflow(csubstr str, csubstr limit) noexcept return str.len > limit.len; } } // namespace detail +/** @endcond */ -/** Test if the following string would overflow when converted to associated - * types. +/** @defgroup doc_overflows overflows: does a number string overflow a type + * + * @{ */ + +/** Test if the following string would overflow when converted to + * associated integral types; this function is dispatched with SFINAE + * to handle differently signed and unsigned types. * @return true if number will overflow, false if it fits (or doesn't parse) + * @see doc_overflow_checked for format specifiers to enforce no-overflow reads */ template auto overflows(csubstr str) noexcept - -> typename std::enable_if::value, bool>::type + -> typename std::enable_if::value, bool>::type { C4_STATIC_ASSERT(std::is_integral::value); @@ -12195,13 +13297,16 @@ auto overflows(csubstr str) noexcept } -/** Test if the following string would overflow when converted to associated - * types. +/** Test if the following string would overflow when converted to + * associated integral types; this function is dispatched with SFINAE + * to handle differently signed and unsigned types. + * * @return true if number will overflow, false if it fits (or doesn't parse) + * @see doc_overflow_checked for format specifiers to enforce no-overflow reads */ template auto overflows(csubstr str) - -> typename std::enable_if::value, bool>::type + -> typename std::enable_if::value, bool>::type { C4_STATIC_ASSERT(std::is_integral::value); if(C4_UNLIKELY(str.len == 0)) @@ -12294,11 +13399,14 @@ auto overflows(csubstr str) return detail::check_overflow(str, detail::charconv_digits::max_value_dec()); } +/** @} */ + //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- +/** @cond dev */ namespace detail { @@ -12523,12 +13631,17 @@ C4_ALWAYS_INLINE bool scan_rhex(csubstr s, T *C4_RESTRICT val) noexcept #endif } // namespace detail +/** @endcond */ #undef _c4appendhex #undef _c4append +/** @defgroup doc_ftoa ftoa: float32 to chars + * + * @{ */ + /** Convert a single-precision real number to string. The string will * in general be NOT null-terminated. For FTOA_FLEX, \p precision is * the number of significand digits. Otherwise \p precision is the @@ -12548,6 +13661,12 @@ C4_ALWAYS_INLINE size_t ftoa(substr str, float v, int precision=-1, RealFormat_e #endif } +/** @} */ + + +/** @defgroup doc_dtoa dtoa: float64 to chars + * + * @{ */ /** Convert a double-precision real number to string. The string will * in general be NOT null-terminated. For FTOA_FLEX, \p precision is @@ -12568,6 +13687,12 @@ C4_ALWAYS_INLINE size_t dtoa(substr str, double v, int precision=-1, RealFormat_ #endif } +/** @} */ + + +/** @defgroup doc_atof atof: chars to float32 + * + * @{ */ /** Convert a string to a single precision real number. * The input string must be trimmed to the value, ie @@ -12609,6 +13734,27 @@ C4_ALWAYS_INLINE bool atof(csubstr str, float * C4_RESTRICT v) noexcept } +/** Convert a string to a single precision real number. + * Leading whitespace is skipped until valid characters are found. + * @return the number of characters read from the string, or npos if + * conversion was not successful or if the string was empty */ +inline size_t atof_first(csubstr str, float * C4_RESTRICT v) noexcept +{ + csubstr trimmed = str.first_real_span(); + if(trimmed.len == 0) + return csubstr::npos; + if(atof(trimmed, v)) + return static_cast(trimmed.end() - str.begin()); + return csubstr::npos; +} + +/** @} */ + + +/** @defgroup doc_atod atod: chars to float64 + * + * @{ */ + /** Convert a string to a double precision real number. * The input string must be trimmed to the value, ie * no leading or trailing whitespace can be present. @@ -12617,6 +13763,7 @@ C4_ALWAYS_INLINE bool atof(csubstr str, float * C4_RESTRICT v) noexcept */ C4_ALWAYS_INLINE bool atod(csubstr str, double * C4_RESTRICT v) noexcept { + C4_ASSERT(str.len > 0); C4_ASSERT(str.triml(" \r\t\n").len == str.len); #if C4CORE_HAVE_FAST_FLOAT // fastfloat cannot parse hexadecimal floats @@ -12648,21 +13795,6 @@ C4_ALWAYS_INLINE bool atod(csubstr str, double * C4_RESTRICT v) noexcept } -/** Convert a string to a single precision real number. - * Leading whitespace is skipped until valid characters are found. - * @return the number of characters read from the string, or npos if - * conversion was not successful or if the string was empty */ -inline size_t atof_first(csubstr str, float * C4_RESTRICT v) noexcept -{ - csubstr trimmed = str.first_real_span(); - if(trimmed.len == 0) - return csubstr::npos; - if(atof(trimmed, v)) - return static_cast(trimmed.end() - str.begin()); - return csubstr::npos; -} - - /** Convert a string to a double precision real number. * Leading whitespace is skipped until valid characters are found. * @return the number of characters read from the string, or npos if @@ -12677,12 +13809,28 @@ inline size_t atod_first(csubstr str, double * C4_RESTRICT v) noexcept return csubstr::npos; } +/** @} */ + //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- // generic versions +/** @cond dev */ +// on some platforms, (unsigned) int and (unsigned) long +// are not any of the fixed length types above +#define _C4_IF_NOT_FIXED_LENGTH_I(T, ty) C4_ALWAYS_INLINE typename std::enable_if::value && !is_fixed_length::value_i, ty> +#define _C4_IF_NOT_FIXED_LENGTH_U(T, ty) C4_ALWAYS_INLINE typename std::enable_if::value && !is_fixed_length::value_u, ty> +/** @endcond*/ + + +/** @defgroup doc_xtoa xtoa: generic value to chars + * + * Dispatches to the most appropriate and efficient conversion + * function + * + * @{ */ C4_ALWAYS_INLINE size_t xtoa(substr s, uint8_t v) noexcept { return write_dec(s, v); } C4_ALWAYS_INLINE size_t xtoa(substr s, uint16_t v) noexcept { return write_dec(s, v); } C4_ALWAYS_INLINE size_t xtoa(substr s, uint32_t v) noexcept { return write_dec(s, v); } @@ -12715,6 +13863,20 @@ C4_ALWAYS_INLINE size_t xtoa(substr s, int64_t v, int64_t radix, size_t num_di C4_ALWAYS_INLINE size_t xtoa(substr s, float v, int precision, RealFormat_e formatting=FTOA_FLEX) noexcept { return ftoa(s, v, precision, formatting); } C4_ALWAYS_INLINE size_t xtoa(substr s, double v, int precision, RealFormat_e formatting=FTOA_FLEX) noexcept { return dtoa(s, v, precision, formatting); } +template _C4_IF_NOT_FIXED_LENGTH_I(T, size_t)::type xtoa(substr buf, T v) noexcept { return itoa(buf, v); } +template _C4_IF_NOT_FIXED_LENGTH_U(T, size_t)::type xtoa(substr buf, T v) noexcept { return write_dec(buf, v); } +template +C4_ALWAYS_INLINE size_t xtoa(substr s, T *v) noexcept { return itoa(s, (intptr_t)v, (intptr_t)16); } + +/** @} */ + +/** @defgroup doc_atox atox: generic chars to value + * + * Dispatches to the most appropriate and efficient conversion + * function + * + * @{ */ + C4_ALWAYS_INLINE bool atox(csubstr s, uint8_t *C4_RESTRICT v) noexcept { return atou(s, v); } C4_ALWAYS_INLINE bool atox(csubstr s, uint16_t *C4_RESTRICT v) noexcept { return atou(s, v); } C4_ALWAYS_INLINE bool atox(csubstr s, uint32_t *C4_RESTRICT v) noexcept { return atou(s, v); } @@ -12726,6 +13888,35 @@ C4_ALWAYS_INLINE bool atox(csubstr s, int64_t *C4_RESTRICT v) noexcept { return C4_ALWAYS_INLINE bool atox(csubstr s, float *C4_RESTRICT v) noexcept { return atof(s, v); } C4_ALWAYS_INLINE bool atox(csubstr s, double *C4_RESTRICT v) noexcept { return atod(s, v); } +template _C4_IF_NOT_FIXED_LENGTH_I(T, bool )::type atox(csubstr buf, T *C4_RESTRICT v) noexcept { return atoi(buf, v); } +template _C4_IF_NOT_FIXED_LENGTH_U(T, bool )::type atox(csubstr buf, T *C4_RESTRICT v) noexcept { return atou(buf, v); } +template +C4_ALWAYS_INLINE bool atox(csubstr s, T **v) noexcept { intptr_t tmp; bool ret = atox(s, &tmp); if(ret) { *v = (T*)tmp; } return ret; } + +/** @} */ + + +/** @defgroup doc_to_chars to_chars: generalized chars to value + * + * Convert the given value, writing into the string. The resulting + * string will NOT be null-terminated. Return the number of + * characters needed. This function is safe to call when the string + * is too small - no writes will occur beyond the string's last + * character. + * + * Dispatches to the most appropriate and efficient conversion + * function. + * + * @see write_dec, doc_utoa, doc_itoa, doc_ftoa, doc_dtoa + * + * @warning When serializing floating point values (float or double), + * be aware that because it uses defaults, to_chars() may cause a + * truncation of the precision. To enforce a particular precision, use + * for example @ref c4::fmt::real, or call directly @ref c4::ftoa or + * @ref c4::dtoa. + * + * @{ */ + C4_ALWAYS_INLINE size_t to_chars(substr buf, uint8_t v) noexcept { return write_dec(buf, v); } C4_ALWAYS_INLINE size_t to_chars(substr buf, uint16_t v) noexcept { return write_dec(buf, v); } C4_ALWAYS_INLINE size_t to_chars(substr buf, uint32_t v) noexcept { return write_dec(buf, v); } @@ -12737,17 +13928,58 @@ C4_ALWAYS_INLINE size_t to_chars(substr buf, int64_t v) noexcept { return itoa( C4_ALWAYS_INLINE size_t to_chars(substr buf, float v) noexcept { return ftoa(buf, v); } C4_ALWAYS_INLINE size_t to_chars(substr buf, double v) noexcept { return dtoa(buf, v); } -C4_ALWAYS_INLINE bool from_chars(csubstr buf, uint8_t *C4_RESTRICT v) noexcept { return atou(buf, v); } -C4_ALWAYS_INLINE bool from_chars(csubstr buf, uint16_t *C4_RESTRICT v) noexcept { return atou(buf, v); } -C4_ALWAYS_INLINE bool from_chars(csubstr buf, uint32_t *C4_RESTRICT v) noexcept { return atou(buf, v); } -C4_ALWAYS_INLINE bool from_chars(csubstr buf, uint64_t *C4_RESTRICT v) noexcept { return atou(buf, v); } -C4_ALWAYS_INLINE bool from_chars(csubstr buf, int8_t *C4_RESTRICT v) noexcept { return atoi(buf, v); } -C4_ALWAYS_INLINE bool from_chars(csubstr buf, int16_t *C4_RESTRICT v) noexcept { return atoi(buf, v); } -C4_ALWAYS_INLINE bool from_chars(csubstr buf, int32_t *C4_RESTRICT v) noexcept { return atoi(buf, v); } +template _C4_IF_NOT_FIXED_LENGTH_I(T, size_t)::type to_chars(substr buf, T v) noexcept { return itoa(buf, v); } +template _C4_IF_NOT_FIXED_LENGTH_U(T, size_t)::type to_chars(substr buf, T v) noexcept { return write_dec(buf, v); } +template +C4_ALWAYS_INLINE size_t to_chars(substr s, T *v) noexcept { return itoa(s, (intptr_t)v, (intptr_t)16); } + +/** @} */ + + +/** @defgroup doc_from_chars from_chars: generalized chars to value + * + * Read a value from the string, which must be trimmed to the value + * (ie, no leading/trailing whitespace). return true if the + * conversion succeeded. There is no check for overflow; the value + * wraps around in a way similar to the standard C/C++ overflow + * behavior. For example, from_chars("128", &val) returns true + * and val will be set tot 0. See @ref doc_overflows and @ref + * doc_overflow_checked for facilities enforcing no-overflow. + * + * Dispatches to the most appropriate and efficient conversion + * function + * + * @see doc_from_chars_first, atou, atoi, atof, atod + * @{ */ + +C4_ALWAYS_INLINE bool from_chars(csubstr buf, uint8_t *C4_RESTRICT v) noexcept { return atou(buf, v); } +C4_ALWAYS_INLINE bool from_chars(csubstr buf, uint16_t *C4_RESTRICT v) noexcept { return atou(buf, v); } +C4_ALWAYS_INLINE bool from_chars(csubstr buf, uint32_t *C4_RESTRICT v) noexcept { return atou(buf, v); } +C4_ALWAYS_INLINE bool from_chars(csubstr buf, uint64_t *C4_RESTRICT v) noexcept { return atou(buf, v); } +C4_ALWAYS_INLINE bool from_chars(csubstr buf, int8_t *C4_RESTRICT v) noexcept { return atoi(buf, v); } +C4_ALWAYS_INLINE bool from_chars(csubstr buf, int16_t *C4_RESTRICT v) noexcept { return atoi(buf, v); } +C4_ALWAYS_INLINE bool from_chars(csubstr buf, int32_t *C4_RESTRICT v) noexcept { return atoi(buf, v); } C4_ALWAYS_INLINE bool from_chars(csubstr buf, int64_t *C4_RESTRICT v) noexcept { return atoi(buf, v); } C4_ALWAYS_INLINE bool from_chars(csubstr buf, float *C4_RESTRICT v) noexcept { return atof(buf, v); } C4_ALWAYS_INLINE bool from_chars(csubstr buf, double *C4_RESTRICT v) noexcept { return atod(buf, v); } +template _C4_IF_NOT_FIXED_LENGTH_I(T, bool )::type from_chars(csubstr buf, T *C4_RESTRICT v) noexcept { return atoi(buf, v); } +template _C4_IF_NOT_FIXED_LENGTH_U(T, bool )::type from_chars(csubstr buf, T *C4_RESTRICT v) noexcept { return atou(buf, v); } +template +C4_ALWAYS_INLINE bool from_chars(csubstr buf, T **v) noexcept { intptr_t tmp; bool ret = from_chars(buf, &tmp); if(ret) { *v = (T*)tmp; } return ret; } + +/** @defgroup doc_from_chars_first from_chars_first: generalized chars to value + * + * Read the first valid sequence of characters from the string, + * skipping leading whitespace, and convert it using @ref doc_from_chars . + * Return the number of characters read for converting. + * + * Dispatches to the most appropriate and efficient conversion + * function. + * + * @see atou_first, atoi_first, atof_first, atod_first + * @{ */ + C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, uint8_t *C4_RESTRICT v) noexcept { return atou_first(buf, v); } C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, uint16_t *C4_RESTRICT v) noexcept { return atou_first(buf, v); } C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, uint32_t *C4_RESTRICT v) noexcept { return atou_first(buf, v); } @@ -12759,41 +13991,17 @@ C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, int64_t *C4_RESTRICT v) n C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, float *C4_RESTRICT v) noexcept { return atof_first(buf, v); } C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, double *C4_RESTRICT v) noexcept { return atod_first(buf, v); } - -//----------------------------------------------------------------------------- -// on some platforms, (unsigned) int and (unsigned) long -// are not any of the fixed length types above - -#define _C4_IF_NOT_FIXED_LENGTH_I(T, ty) C4_ALWAYS_INLINE typename std::enable_if::value && !is_fixed_length::value_i, ty> -#define _C4_IF_NOT_FIXED_LENGTH_U(T, ty) C4_ALWAYS_INLINE typename std::enable_if::value && !is_fixed_length::value_u, ty> - -template _C4_IF_NOT_FIXED_LENGTH_I(T, size_t)::type xtoa(substr buf, T v) noexcept { return itoa(buf, v); } -template _C4_IF_NOT_FIXED_LENGTH_U(T, size_t)::type xtoa(substr buf, T v) noexcept { return write_dec(buf, v); } - -template _C4_IF_NOT_FIXED_LENGTH_I(T, bool )::type atox(csubstr buf, T *C4_RESTRICT v) noexcept { return atoi(buf, v); } -template _C4_IF_NOT_FIXED_LENGTH_U(T, bool )::type atox(csubstr buf, T *C4_RESTRICT v) noexcept { return atou(buf, v); } - -template _C4_IF_NOT_FIXED_LENGTH_I(T, size_t)::type to_chars(substr buf, T v) noexcept { return itoa(buf, v); } -template _C4_IF_NOT_FIXED_LENGTH_U(T, size_t)::type to_chars(substr buf, T v) noexcept { return write_dec(buf, v); } - -template _C4_IF_NOT_FIXED_LENGTH_I(T, bool )::type from_chars(csubstr buf, T *C4_RESTRICT v) noexcept { return atoi(buf, v); } -template _C4_IF_NOT_FIXED_LENGTH_U(T, bool )::type from_chars(csubstr buf, T *C4_RESTRICT v) noexcept { return atou(buf, v); } - template _C4_IF_NOT_FIXED_LENGTH_I(T, size_t)::type from_chars_first(csubstr buf, T *C4_RESTRICT v) noexcept { return atoi_first(buf, v); } template _C4_IF_NOT_FIXED_LENGTH_U(T, size_t)::type from_chars_first(csubstr buf, T *C4_RESTRICT v) noexcept { return atou_first(buf, v); } +template +C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, T **v) noexcept { intptr_t tmp; bool ret = from_chars_first(buf, &tmp); if(ret) { *v = (T*)tmp; } return ret; } -#undef _C4_IF_NOT_FIXED_LENGTH_I -#undef _C4_IF_NOT_FIXED_LENGTH_U - +/** @} */ -//----------------------------------------------------------------------------- -// for pointers +/** @} */ -template C4_ALWAYS_INLINE size_t xtoa(substr s, T *v) noexcept { return itoa(s, (intptr_t)v, (intptr_t)16); } -template C4_ALWAYS_INLINE bool atox(csubstr s, T **v) noexcept { intptr_t tmp; bool ret = atox(s, &tmp); if(ret) { *v = (T*)tmp; } return ret; } -template C4_ALWAYS_INLINE size_t to_chars(substr s, T *v) noexcept { return itoa(s, (intptr_t)v, (intptr_t)16); } -template C4_ALWAYS_INLINE bool from_chars(csubstr buf, T **v) noexcept { intptr_t tmp; bool ret = from_chars(buf, &tmp); if(ret) { *v = (T*)tmp; } return ret; } -template C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, T **v) noexcept { intptr_t tmp; bool ret = from_chars_first(buf, &tmp); if(ret) { *v = (T*)tmp; } return ret; } +#undef _C4_IF_NOT_FIXED_LENGTH_I +#undef _C4_IF_NOT_FIXED_LENGTH_U //----------------------------------------------------------------------------- @@ -12802,7 +14010,10 @@ template C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, T **v) /** call to_chars() and return a substr consisting of the * written portion of the input buffer. Ie, same as to_chars(), * but return a substr instead of a size_t. - * + * Convert the given value to a string using to_chars(), and + * return the resulting string, up to and including the last + * written character. + * @ingroup doc_to_chars * @see to_chars() */ template C4_ALWAYS_INLINE substr to_chars_sub(substr buf, T const& C4_RESTRICT v) noexcept @@ -12816,12 +14027,14 @@ C4_ALWAYS_INLINE substr to_chars_sub(substr buf, T const& C4_RESTRICT v) noexcep //----------------------------------------------------------------------------- // bool implementation +/** @ingroup doc_to_chars */ C4_ALWAYS_INLINE size_t to_chars(substr buf, bool v) noexcept { int val = v; return to_chars(buf, val); } +/** @ingroup doc_from_chars */ inline bool from_chars(csubstr buf, bool * C4_RESTRICT v) noexcept { if(buf == '0') @@ -12866,6 +14079,7 @@ inline bool from_chars(csubstr buf, bool * C4_RESTRICT v) noexcept return ret; } +/** @ingroup doc_from_chars_first */ inline size_t from_chars_first(csubstr buf, bool * C4_RESTRICT v) noexcept { csubstr trimmed = buf.first_non_empty_span(); @@ -12878,28 +14092,36 @@ inline size_t from_chars_first(csubstr buf, bool * C4_RESTRICT v) noexcept //----------------------------------------------------------------------------- // single-char implementation +/** @ingroup doc_to_chars */ inline size_t to_chars(substr buf, char v) noexcept { if(buf.len > 0) - buf[0] = v; + { + C4_XASSERT(buf.str); + buf.str[0] = v; + } return 1; } /** extract a single character from a substring - * @note to extract a string instead and not just a single character, use the csubstr overload */ + * @note to extract a string instead and not just a single character, use the csubstr overload + * @ingroup doc_from_chars + * */ inline bool from_chars(csubstr buf, char * C4_RESTRICT v) noexcept { if(buf.len != 1) return false; - *v = buf[0]; + C4_XASSERT(buf.str); + *v = buf.str[0]; return true; } +/** @ingroup doc_from_chars_first */ inline size_t from_chars_first(csubstr buf, char * C4_RESTRICT v) noexcept { if(buf.len < 1) return csubstr::npos; - *v = buf[0]; + *v = buf.str[0]; return 1; } @@ -12907,6 +14129,7 @@ inline size_t from_chars_first(csubstr buf, char * C4_RESTRICT v) noexcept //----------------------------------------------------------------------------- // csubstr implementation +/** @ingroup doc_to_chars */ inline size_t to_chars(substr buf, csubstr v) noexcept { C4_ASSERT(!buf.overlaps(v)); @@ -12923,12 +14146,14 @@ inline size_t to_chars(substr buf, csubstr v) noexcept return v.len; } +/** @ingroup doc_from_chars */ inline bool from_chars(csubstr buf, csubstr *C4_RESTRICT v) noexcept { *v = buf; return true; } +/** @ingroup doc_from_chars_first */ inline size_t from_chars_first(substr buf, csubstr * C4_RESTRICT v) noexcept { csubstr trimmed = buf.first_non_empty_span(); @@ -12942,6 +14167,7 @@ inline size_t from_chars_first(substr buf, csubstr * C4_RESTRICT v) noexcept //----------------------------------------------------------------------------- // substr +/** @ingroup doc_to_chars */ inline size_t to_chars(substr buf, substr v) noexcept { C4_ASSERT(!buf.overlaps(v)); @@ -12958,6 +14184,7 @@ inline size_t to_chars(substr buf, substr v) noexcept return v.len; } +/** @ingroup doc_from_chars */ inline bool from_chars(csubstr buf, substr * C4_RESTRICT v) noexcept { C4_ASSERT(!buf.overlaps(*v)); @@ -12979,6 +14206,7 @@ inline bool from_chars(csubstr buf, substr * C4_RESTRICT v) noexcept return false; } +/** @ingroup doc_from_chars_first */ inline size_t from_chars_first(csubstr buf, substr * C4_RESTRICT v) noexcept { csubstr trimmed = buf.first_non_empty_span(); @@ -13003,6 +14231,7 @@ inline size_t from_chars_first(csubstr buf, substr * C4_RESTRICT v) noexcept //----------------------------------------------------------------------------- +/** @ingroup doc_to_chars */ template inline size_t to_chars(substr buf, const char (& C4_RESTRICT v)[N]) noexcept { @@ -13010,16 +14239,21 @@ inline size_t to_chars(substr buf, const char (& C4_RESTRICT v)[N]) noexcept return to_chars(buf, sp); } +/** @ingroup doc_to_chars */ inline size_t to_chars(substr buf, const char * C4_RESTRICT v) noexcept { return to_chars(buf, to_csubstr(v)); } +/** @} */ + } // namespace c4 #ifdef _MSC_VER # pragma warning(pop) -#elif defined(__clang__) +#endif + +#if defined(__clang__) # pragma clang diagnostic pop #elif defined(__GNUC__) # pragma GCC diagnostic pop @@ -13117,8 +14351,29 @@ size_t decode_code_point(uint8_t *C4_RESTRICT buf, size_t buflen, const uint32_t # pragma GCC diagnostic ignored "-Wuseless-cast" #endif +/** @defgroup doc_format_utils Format utilities + * + * @brief Provides generic and type-safe formatting/scanning utilities + * built on top of @ref doc_to_chars() and @ref doc_from_chars, + * forwarding the arguments to these functions, which in turn use the + * @ref doc_charconv utilities. Like @ref doc_charconv, the formatting + * facilities are very efficient and many times faster than printf(). + * + * @see [a formatting sample in rapidyaml's docs](https://rapidyaml.readthedocs.io/latest/doxygen/group__doc__quickstart.html#gac2425b515eb552589708cfff70c52b14) + * */ + +/** @defgroup doc_format_specifiers Format specifiers + * + * @brief Format specifiers are tag types and functions that are used + * together with @ref doc_to_chars and @ref doc_from_chars + * + * @see [a formatting sample in rapidyaml's docs](https://rapidyaml.readthedocs.io/latest/doxygen/group__doc__quickstart.html#gac2425b515eb552589708cfff70c52b14) + * @ingroup doc_format_utils */ + namespace c4 { +/** @addtogroup doc_format_utils + * @{ */ //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- @@ -13127,6 +14382,12 @@ namespace c4 { namespace fmt { +/** @addtogroup doc_format_specifiers + * @{ */ + +/** @defgroup doc_boolean_specifiers boolean specifiers + * @{ */ + /** write a variable as an alphabetic boolean, ie as either true or false * @param strict_read */ template @@ -13143,9 +14404,15 @@ boolalpha_ boolalpha(T const& val, bool strict_read=false) return boolalpha_(val, strict_read); } +/** @} */ + +/** @} */ + } // namespace fmt -/** write a variable as an alphabetic boolean, ie as either true or false */ +/** write a variable as an alphabetic boolean, ie as either true or + * false + * @ingroup doc_to_chars */ template inline size_t to_chars(substr buf, fmt::boolalpha_ fmt) { @@ -13161,10 +14428,17 @@ inline size_t to_chars(substr buf, fmt::boolalpha_ fmt) namespace fmt { +/** @addtogroup doc_format_specifiers + * @{ */ + +/** @defgroup doc_integer_specifiers Integer specifiers + * @{ */ + /** format an integral type with a custom radix */ template struct integral_ { + C4_STATIC_ASSERT(std::is_integral::value); T val; T radix; C4_ALWAYS_INLINE integral_(T val_, T radix_) : val(val_), radix(radix_) {} @@ -13174,12 +14448,14 @@ struct integral_ template struct integral_padded_ { + C4_STATIC_ASSERT(std::is_integral::value); T val; T radix; size_t num_digits; C4_ALWAYS_INLINE integral_padded_(T val_, T radix_, size_t nd) : val(val_), radix(radix_), num_digits(nd) {} }; + /** format an integral type with a custom radix */ template C4_ALWAYS_INLINE integral_ integral(T val, T radix=10) @@ -13198,34 +14474,6 @@ C4_ALWAYS_INLINE integral_ integral(std::nullptr_t, T radix=10) { return integral_(intptr_t(0), static_cast(radix)); } -/** pad the argument with zeroes on the left, with decimal radix */ -template -C4_ALWAYS_INLINE integral_padded_ zpad(T val, size_t num_digits) -{ - return integral_padded_(val, T(10), num_digits); -} -/** pad the argument with zeroes on the left */ -template -C4_ALWAYS_INLINE integral_padded_ zpad(integral_ val, size_t num_digits) -{ - return integral_padded_(val.val, val.radix, num_digits); -} -/** pad the argument with zeroes on the left */ -C4_ALWAYS_INLINE integral_padded_ zpad(std::nullptr_t, size_t num_digits) -{ - return integral_padded_(0, 16, num_digits); -} -/** pad the argument with zeroes on the left */ -template -C4_ALWAYS_INLINE integral_padded_ zpad(T const* val, size_t num_digits) -{ - return integral_padded_(reinterpret_cast(val), 16, num_digits); -} -template -C4_ALWAYS_INLINE integral_padded_ zpad(T * val, size_t num_digits) -{ - return integral_padded_(reinterpret_cast(val), 16, num_digits); -} /** format the pointer as an hexadecimal value */ @@ -13306,6 +14554,46 @@ inline integral_ bin(T v) return integral_(v, T(2)); } +/** @} */ // integer_specifiers + + +/** @defgroup doc_zpad Pad the number with zeroes on the left + * @{ */ + +/** pad the argument with zeroes on the left, with decimal radix */ +template +C4_ALWAYS_INLINE integral_padded_ zpad(T val, size_t num_digits) +{ + return integral_padded_(val, T(10), num_digits); +} +/** pad the argument with zeroes on the left */ +template +C4_ALWAYS_INLINE integral_padded_ zpad(integral_ val, size_t num_digits) +{ + return integral_padded_(val.val, val.radix, num_digits); +} +/** pad the argument with zeroes on the left */ +C4_ALWAYS_INLINE integral_padded_ zpad(std::nullptr_t, size_t num_digits) +{ + return integral_padded_(0, 16, num_digits); +} +/** pad the argument with zeroes on the left */ +template +C4_ALWAYS_INLINE integral_padded_ zpad(T const* val, size_t num_digits) +{ + return integral_padded_(reinterpret_cast(val), 16, num_digits); +} +template +C4_ALWAYS_INLINE integral_padded_ zpad(T * val, size_t num_digits) +{ + return integral_padded_(reinterpret_cast(val), 16, num_digits); +} + +/** @} */ // zpad + + +/** @defgroup doc_overflow_checked Check read for overflow + * @{ */ template struct overflow_checked_ @@ -13320,9 +14608,15 @@ C4_ALWAYS_INLINE overflow_checked_ overflow_checked(T &val) return overflow_checked_(val); } +/** @} */ // overflow_checked + +/** @} */ // format_specifiers + + } // namespace fmt -/** format an integral_ signed type */ +/** format an integer signed type + * @ingroup doc_to_chars */ template C4_ALWAYS_INLINE typename std::enable_if::value, size_t>::type @@ -13330,7 +14624,8 @@ to_chars(substr buf, fmt::integral_ fmt) { return itoa(buf, fmt.val, fmt.radix); } -/** format an integral_ signed type, pad with zeroes */ +/** format an integer signed type, pad with zeroes + * @ingroup doc_to_chars */ template C4_ALWAYS_INLINE typename std::enable_if::value, size_t>::type @@ -13339,7 +14634,8 @@ to_chars(substr buf, fmt::integral_padded_ fmt) return itoa(buf, fmt.val, fmt.radix, fmt.num_digits); } -/** format an integral_ unsigned type */ +/** format an integer unsigned type + * @ingroup doc_to_chars */ template C4_ALWAYS_INLINE typename std::enable_if::value, size_t>::type @@ -13347,7 +14643,8 @@ to_chars(substr buf, fmt::integral_ fmt) { return utoa(buf, fmt.val, fmt.radix); } -/** format an integral_ unsigned type, pad with zeroes */ +/** format an integer unsigned type, pad with zeroes + * @ingroup doc_to_chars */ template C4_ALWAYS_INLINE typename std::enable_if::value, size_t>::type @@ -13356,6 +14653,8 @@ to_chars(substr buf, fmt::integral_padded_ fmt) return utoa(buf, fmt.val, fmt.radix, fmt.num_digits); } +/** read an integer type, detecting overflow (returns false on overflow) + * @ingroup doc_from_chars */ template C4_ALWAYS_INLINE bool from_chars(csubstr s, fmt::overflow_checked_ wrapper) { @@ -13363,6 +14662,15 @@ C4_ALWAYS_INLINE bool from_chars(csubstr s, fmt::overflow_checked_ wrapper) return atox(s, wrapper.val); return false; } +/** read an integer type, detecting overflow (returns false on overflow) + * @ingroup doc_from_chars */ +template +C4_ALWAYS_INLINE bool from_chars(csubstr s, fmt::overflow_checked_ *wrapper) +{ + if(C4_LIKELY(!overflows(s))) + return atox(s, wrapper->val); + return false; +} //----------------------------------------------------------------------------- @@ -13372,6 +14680,12 @@ C4_ALWAYS_INLINE bool from_chars(csubstr s, fmt::overflow_checked_ wrapper) namespace fmt { +/** @addtogroup doc_format_specifiers + * @{ */ + +/** @defgroup doc_real_specifiers Real specifiers + * @{ */ + template struct real_ { @@ -13387,9 +14701,15 @@ real_ real(T val, int precision, RealFormat_e fmt=FTOA_FLOAT) return real_(val, precision, fmt); } +/** @} */ // real_specifiers + +/** @} */ // format_specifiers + } // namespace fmt +/** @ingroup doc_to_chars */ inline size_t to_chars(substr buf, fmt::real_< float> fmt) { return ftoa(buf, fmt.val, fmt.precision, fmt.fmt); } +/** @ingroup doc_to_chars */ inline size_t to_chars(substr buf, fmt::real_ fmt) { return dtoa(buf, fmt.val, fmt.precision, fmt.fmt); } @@ -13400,6 +14720,12 @@ inline size_t to_chars(substr buf, fmt::real_ fmt) { return dtoa(buf, fm namespace fmt { +/** @addtogroup doc_format_specifiers + * @{ */ + +/** @defgroup doc_raw_binary_specifiers Raw binary data + * @{ */ + /** @see blob_ */ template struct raw_wrapper_ : public blob_ @@ -13457,26 +14783,35 @@ inline raw_wrapper raw(T & C4_RESTRICT data, size_t alignment=alignof(T)) return raw_wrapper(blob(data), alignment); } +/** @} */ // raw_binary_specifiers + +/** @} */ // format_specifiers + } // namespace fmt -/** write a variable in raw binary format, using memcpy */ +/** write a variable in raw binary format, using memcpy + * @ingroup doc_to_chars */ C4CORE_EXPORT size_t to_chars(substr buf, fmt::const_raw_wrapper r); -/** read a variable in raw binary format, using memcpy */ +/** read a variable in raw binary format, using memcpy + * @ingroup doc_from_chars */ C4CORE_EXPORT bool from_chars(csubstr buf, fmt::raw_wrapper *r); -/** read a variable in raw binary format, using memcpy */ +/** read a variable in raw binary format, using memcpy + * @ingroup doc_from_chars */ inline bool from_chars(csubstr buf, fmt::raw_wrapper r) { return from_chars(buf, &r); } -/** read a variable in raw binary format, using memcpy */ +/** read a variable in raw binary format, using memcpy + * @ingroup doc_from_chars_first */ inline size_t from_chars_first(csubstr buf, fmt::raw_wrapper *r) { return from_chars(buf, r); } -/** read a variable in raw binary format, using memcpy */ +/** read a variable in raw binary format, using memcpy + * @ingroup doc_from_chars_first */ inline size_t from_chars_first(csubstr buf, fmt::raw_wrapper r) { return from_chars(buf, &r); @@ -13490,6 +14825,12 @@ inline size_t from_chars_first(csubstr buf, fmt::raw_wrapper r) namespace fmt { +/** @addtogroup doc_format_specifiers + * @{ */ + +/** @defgroup doc_alignment_specifiers Alignment specifiers + * @{ */ + template struct left_ { @@ -13522,9 +14863,14 @@ right_ right(T val, size_t width, char padchar=' ') return right_(val, width, padchar); } +/** @} */ // alignment_specifiers + +/** @} */ // format_specifiers + } // namespace fmt +/** @ingroup doc_to_chars */ template size_t to_chars(substr buf, fmt::left_ const& C4_RESTRICT align) { @@ -13536,6 +14882,7 @@ size_t to_chars(substr buf, fmt::left_ const& C4_RESTRICT align) return align.width; } +/** @ingroup doc_to_chars */ template size_t to_chars(substr buf, fmt::right_ const& C4_RESTRICT align) { @@ -13553,13 +14900,16 @@ size_t to_chars(substr buf, fmt::right_ const& C4_RESTRICT align) //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- -/// @cond dev +/** @defgroup doc_cat cat: concatenate arguments to string + * @{ */ + +/** @cond dev */ // terminates the variadic recursion inline size_t cat(substr /*buf*/) { return 0; } -/// @endcond +/** @endcond */ /** serialize the arguments, concatenating them to the given fixed-size buffer. @@ -13587,16 +14937,22 @@ substr cat_sub(substr buf, Args && ...args) return {buf.str, sz <= buf.len ? sz : buf.len}; } +/** @} */ + //----------------------------------------------------------------------------- -/// @cond dev + +/** @defgroup doc_uncat uncat: read concatenated arguments from string + * @{ */ + +/** @cond dev */ // terminates the variadic recursion inline size_t uncat(csubstr /*buf*/) { return 0; } -/// @endcond +/** @endcond */ /** deserialize the arguments from the given buffer. @@ -13617,16 +14973,22 @@ size_t uncat(csubstr buf, Arg & C4_RESTRICT a, Args & C4_RESTRICT ...more) return out + num; } +/** @} */ + //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- -namespace detail { +/** @defgroup doc_catsep catsep: cat arguments to string with separator + * @{ */ + +/** @cond dev */ +namespace detail { template -inline size_t catsep_more(substr /*buf*/, Sep const& C4_RESTRICT /*sep*/) +C4_ALWAYS_INLINE size_t catsep_more(substr /*buf*/, Sep const& C4_RESTRICT /*sep*/) { return 0; } @@ -13634,7 +14996,8 @@ inline size_t catsep_more(substr /*buf*/, Sep const& C4_RESTRICT /*sep*/) template size_t catsep_more(substr buf, Sep const& C4_RESTRICT sep, Arg const& C4_RESTRICT a, Args const& C4_RESTRICT ...more) { - size_t ret = to_chars(buf, sep), num = ret; + size_t ret = to_chars(buf, sep); + size_t num = ret; buf = buf.len >= ret ? buf.sub(ret) : substr{}; ret = to_chars(buf, a); num += ret; @@ -13644,6 +15007,7 @@ size_t catsep_more(substr buf, Sep const& C4_RESTRICT sep, Arg const& C4_RESTRIC return num; } + template inline size_t uncatsep_more(csubstr /*buf*/, Sep & /*sep*/) { @@ -13653,7 +15017,8 @@ inline size_t uncatsep_more(csubstr /*buf*/, Sep & /*sep*/) template size_t uncatsep_more(csubstr buf, Sep & C4_RESTRICT sep, Arg & C4_RESTRICT a, Args & C4_RESTRICT ...more) { - size_t ret = from_chars_first(buf, &sep), num = ret; + size_t ret = from_chars_first(buf, &sep); + size_t num = ret; if(C4_UNLIKELY(ret == csubstr::npos)) return csubstr::npos; buf = buf.len >= ret ? buf.sub(ret) : substr{}; @@ -13671,6 +15036,13 @@ size_t uncatsep_more(csubstr buf, Sep & C4_RESTRICT sep, Arg & C4_RESTRICT a, Ar } // namespace detail +template +size_t catsep(substr /*buf*/, Sep const& C4_RESTRICT /*sep*/) +{ + return 0; +} +/** @endcond */ + /** serialize the arguments, concatenating them to the given fixed-size * buffer, using a separator between each argument. @@ -13699,6 +15071,23 @@ substr catsep_sub(substr buf, Args && ...args) return {buf.str, sz <= buf.len ? sz : buf.len}; } +/** @} */ + + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** @defgroup doc_uncatsep uncatsep: deserialize the separated arguments from a string + * @{ */ + +/** deserialize the arguments from the given buffer. + * + * @return the number of characters read from the buffer, or csubstr::npos + * if a conversion was not successful. + * @see c4::cat(). c4::uncat() is the inverse of c4::cat(). */ + /** deserialize the arguments from the given buffer, using a separator. * * @return the number of characters read from the buffer, or csubstr::npos @@ -13718,11 +15107,16 @@ size_t uncatsep(csubstr buf, Sep & C4_RESTRICT sep, Arg & C4_RESTRICT a, Args & return num; } +/** @} */ + //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- +/** @defgroup doc_format format: formatted string interpolation + * @{ */ + /// @cond dev // terminates the variadic recursion inline size_t format(substr buf, csubstr fmt) @@ -13775,9 +15169,14 @@ substr format_sub(substr buf, csubstr fmt, Args const& C4_RESTRICT ...args) return {buf.str, sz <= buf.len ? sz : buf.len}; } +/** @} */ + //----------------------------------------------------------------------------- +/** @defgroup doc_unformat unformat: formatted read from string + * @{ */ + /// @cond dev // terminates the variadic recursion inline size_t unformat(csubstr /*buf*/, csubstr fmt) @@ -13812,26 +15211,18 @@ size_t unformat(csubstr buf, csubstr fmt, Arg & C4_RESTRICT a, Args & C4_RESTRIC return out; } +/** @} */ + //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- -/** a tag type for marking append to container - * @see c4::catrs() */ -struct append_t {}; - -/** a tag variable - * @see c4::catrs() */ -constexpr const append_t append = {}; - - -//----------------------------------------------------------------------------- - -/** like c4::cat(), but receives a container, and resizes it as needed to contain - * the result. The container is overwritten. To append to it, use the append - * overload. - * @see c4::cat() */ +/** cat+resize: like c4::cat(), but receives a container, and resizes + * it as needed to contain the result. The container is + * overwritten. To append to it, use the append overload. + * @see c4::cat() + * @ingroup doc_cat */ template inline void catrs(CharOwningContainer * C4_RESTRICT cont, Args const& C4_RESTRICT ...args) { @@ -13843,9 +15234,10 @@ inline void catrs(CharOwningContainer * C4_RESTRICT cont, Args const& C4_RESTRIC goto retry; } -/** like c4::cat(), but creates and returns a new container sized as needed to contain - * the result. - * @see c4::cat() */ +/** cat+resize: like c4::cat(), but creates and returns a new + * container sized as needed to contain the result. + * @see c4::cat() + * @ingroup doc_cat */ template inline CharOwningContainer catrs(Args const& C4_RESTRICT ...args) { @@ -13854,13 +15246,16 @@ inline CharOwningContainer catrs(Args const& C4_RESTRICT ...args) return cont; } -/** like c4::cat(), but receives a container, and appends to it instead of - * overwriting it. The container is resized as needed to contain the result. +/** cat+resize+append: like c4::cat(), but receives a container, and + * appends to it instead of overwriting it. The container is resized + * as needed to contain the result. + * * @return the region newly appended to the original container * @see c4::cat() - * @see c4::catrs() */ + * @see c4::catrs() + * @ingroup doc_cat */ template -inline csubstr catrs(append_t, CharOwningContainer * C4_RESTRICT cont, Args const& C4_RESTRICT ...args) +inline csubstr catrs_append(CharOwningContainer * C4_RESTRICT cont, Args const& C4_RESTRICT ...args) { const size_t pos = cont->size(); retry: @@ -13875,19 +15270,12 @@ inline csubstr catrs(append_t, CharOwningContainer * C4_RESTRICT cont, Args cons //----------------------------------------------------------------------------- -/// @cond dev -// terminates the recursion -template -inline void catseprs(CharOwningContainer * C4_RESTRICT, Sep const& C4_RESTRICT) -{ - return; -} -/// @end cond - - -/** like c4::catsep(), but receives a container, and resizes it as needed to contain the result. - * The container is overwritten. To append to the container use the append overload. - * @see c4::catsep() */ +/** catsep+resize: like c4::catsep(), but receives a container, and + * resizes it as needed to contain the result. The container is + * overwritten. To append to the container use the append overload. + * + * @see c4::catsep() + * @ingroup doc_catsep */ template inline void catseprs(CharOwningContainer * C4_RESTRICT cont, Sep const& C4_RESTRICT sep, Args const& C4_RESTRICT ...args) { @@ -13899,8 +15287,11 @@ inline void catseprs(CharOwningContainer * C4_RESTRICT cont, Sep const& C4_RESTR goto retry; } -/** like c4::catsep(), but create a new container with the result. - * @return the requested container */ +/** catsep+resize: like c4::catsep(), but create a new container with + * the result. + * + * @return the requested container + * @ingroup doc_catsep */ template inline CharOwningContainer catseprs(Sep const& C4_RESTRICT sep, Args const& C4_RESTRICT ...args) { @@ -13910,22 +15301,15 @@ inline CharOwningContainer catseprs(Sep const& C4_RESTRICT sep, Args const& C4_R } -/// @cond dev -// terminates the recursion -template -inline csubstr catseprs(append_t, CharOwningContainer * C4_RESTRICT, Sep const& C4_RESTRICT) -{ - csubstr s; - return s; -} -/// @endcond - -/** like catsep(), but receives a container, and appends the arguments, resizing the - * container as needed to contain the result. The buffer is appended to. +/** catsep+resize+append: like catsep(), but receives a container, and + * appends the arguments, resizing the container as needed to contain + * the result. The buffer is appended to. + * * @return a csubstr of the appended part - * @ingroup formatting_functions */ + * @ingroup formatting_functions + * @ingroup doc_catsep */ template -inline csubstr catseprs(append_t, CharOwningContainer * C4_RESTRICT cont, Sep const& C4_RESTRICT sep, Args const& C4_RESTRICT ...args) +inline csubstr catseprs_append(CharOwningContainer * C4_RESTRICT cont, Sep const& C4_RESTRICT sep, Args const& C4_RESTRICT ...args) { const size_t pos = cont->size(); retry: @@ -13940,10 +15324,12 @@ inline csubstr catseprs(append_t, CharOwningContainer * C4_RESTRICT cont, Sep co //----------------------------------------------------------------------------- -/** like c4::format(), but receives a container, and resizes it as needed - * to contain the result. The container is overwritten. To append to - * the container use the append overload. - * @see c4::format() */ +/** format+resize: like c4::format(), but receives a container, and + * resizes it as needed to contain the result. The container is + * overwritten. To append to the container use the append overload. + * + * @see c4::format() + * @ingroup doc_format */ template inline void formatrs(CharOwningContainer * C4_RESTRICT cont, csubstr fmt, Args const& C4_RESTRICT ...args) { @@ -13955,8 +15341,11 @@ inline void formatrs(CharOwningContainer * C4_RESTRICT cont, csubstr fmt, Args c goto retry; } -/** like c4::format(), but create a new container with the result. - * @return the requested container */ +/** format+resize: like c4::format(), but create a new container with + * the result. + * + * @return the requested container + * @ingroup doc_format */ template inline CharOwningContainer formatrs(csubstr fmt, Args const& C4_RESTRICT ...args) { @@ -13965,13 +15354,14 @@ inline CharOwningContainer formatrs(csubstr fmt, Args const& C4_RESTRICT ...args return cont; } -/** like format(), but receives a container, and appends the +/** format+resize+append: like format(), but receives a container, and appends the * arguments, resizing the container as needed to contain the * result. The buffer is appended to. * @return the region newly appended to the original container - * @ingroup formatting_functions */ + * @ingroup formatting_functions + * @ingroup doc_format */ template -inline csubstr formatrs(append_t, CharOwningContainer * C4_RESTRICT cont, csubstr fmt, Args const& C4_RESTRICT ...args) +inline csubstr formatrs_append(CharOwningContainer * C4_RESTRICT cont, csubstr fmt, Args const& C4_RESTRICT ...args) { const size_t pos = cont->size(); retry: @@ -13983,6 +15373,8 @@ inline csubstr formatrs(append_t, CharOwningContainer * C4_RESTRICT cont, csubst return to_csubstr(*cont).range(pos, cont->size()); } +/** @} */ + } // namespace c4 #ifdef _MSC_VER @@ -14020,6 +15412,8 @@ inline csubstr formatrs(append_t, CharOwningContainer * C4_RESTRICT cont, csubst namespace c4 { +C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast") + //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- @@ -14380,6 +15774,8 @@ C4_ALWAYS_INLINE DumpResults catsep_dump_resume(DumperFn &&dumpfn, substr buf, S //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- +/// @cond dev + /** take the function pointer as a function argument */ template C4_ALWAYS_INLINE size_t format_dump(DumperFn &&dumpfn, substr buf, csubstr fmt) @@ -14391,7 +15787,7 @@ C4_ALWAYS_INLINE size_t format_dump(DumperFn &&dumpfn, substr buf, csubstr fmt) return 0u; } -/** take the function pointer as a function argument */ +/** take the function pointer as a template argument */ template C4_ALWAYS_INLINE size_t format_dump(substr buf, csubstr fmt) { @@ -14402,9 +15798,12 @@ C4_ALWAYS_INLINE size_t format_dump(substr buf, csubstr fmt) return 0u; } +/// @endcond + + /** take the function pointer as a function argument */ template -size_t format_dump(DumperFn &&dumpfn, substr buf, csubstr fmt, Arg const& C4_RESTRICT a, Args const& C4_RESTRICT ...more) +C4_NO_INLINE size_t format_dump(DumperFn &&dumpfn, substr buf, csubstr fmt, Arg const& C4_RESTRICT a, Args const& C4_RESTRICT ...more) { // we can dump without using buf // but we'll only dump if the buffer is ok @@ -14427,7 +15826,7 @@ size_t format_dump(DumperFn &&dumpfn, substr buf, csubstr fmt, Arg const& C4_RES /** take the function pointer as a template argument */ template -size_t format_dump(substr buf, csubstr fmt, Arg const& C4_RESTRICT a, Args const& C4_RESTRICT ...more) +C4_NO_INLINE size_t format_dump(substr buf, csubstr fmt, Arg const& C4_RESTRICT a, Args const& C4_RESTRICT ...more) { // we can dump without using buf // but we'll only dump if the buffer is ok @@ -14587,6 +15986,7 @@ C4_ALWAYS_INLINE DumpResults format_dump_resume(DumperFn &&dumpfn, substr buf, c return detail::format_dump_resume(0u, dumpfn, DumpResults{}, buf, fmt, more...); } +C4_SUPPRESS_WARNING_GCC_CLANG_POP } // namespace c4 @@ -14624,6 +16024,8 @@ C4_ALWAYS_INLINE DumpResults format_dump_resume(DumperFn &&dumpfn, substr buf, c namespace c4 { +C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast") + //! taken from http://stackoverflow.com/questions/15586163/c11-type-trait-to-differentiate-between-enum-class-and-regular-enum template using is_scoped_enum = std::integral_constant::value && !std::is_convertible::value>; @@ -14751,7 +16153,6 @@ size_t eoffs(EnumOffsetType which) } default: C4_ERROR("unknown offset type %d", (int)which); - return 0; } } @@ -14885,6 +16286,8 @@ const char* EnumSymbols::Sym::name_offs(EnumOffsetType t) const return name + eoffs(t); } +C4_SUPPRESS_WARNING_GCC_CLANG_POP + } // namespace c4 #endif // _C4_ENUM_HPP_ @@ -14926,12 +16329,17 @@ const char* EnumSymbols::Sym::name_offs(EnumOffsetType t) const #endif /* C4_FORMAT_HPP_ */ -#ifdef _MSC_VER +#if defined(_MSC_VER) # pragma warning(push) # pragma warning(disable : 4996) // 'strncpy', fopen, etc: This function or variable may be unsafe -#elif defined(__clang__) +#endif + +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" #elif defined(__GNUC__) # pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" # if __GNUC__ >= 8 # pragma GCC diagnostic ignored "-Wstringop-truncation" # pragma GCC diagnostic ignored "-Wstringop-overflow" @@ -15153,7 +16561,7 @@ typename std::underlying_type::type str2bm_read_one(const char *str, size_ C4_CHECK_MSG(p != nullptr, "no valid enum pair name for '%.*s'", (int)sz, str); return static_cast(p->value); } - I tmp; + I tmp{0}; size_t len = uncat(csubstr(str, sz), tmp); C4_CHECK_MSG(len != csubstr::npos, "could not read string as an integral type: '%.*s'", (int)sz, str); return tmp; @@ -15239,7 +16647,10 @@ typename std::underlying_type::type str2bm(const char *str) #ifdef _MSC_VER # pragma warning(pop) -#elif defined(__clang__) +#endif + +#if defined(__clang__) +# pragma clang diagnostic pop #elif defined(__GNUC__) # pragma GCC diagnostic pop #endif @@ -15290,6 +16701,8 @@ typename std::underlying_type::type str2bm(const char *str) namespace c4 { +C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast") + //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- @@ -15357,8 +16770,8 @@ class span_crtp C4_ALWAYS_INLINE const_iterator cend() const noexcept { return _c4cptr + _c4csz; } C4_ALWAYS_INLINE reverse_iterator rbegin() noexcept { return reverse_iterator(_c4ptr + _c4sz); } - C4_ALWAYS_INLINE const_reverse_iterator rbegin() const noexcept { return reverse_iterator(_c4cptr + _c4sz); } - C4_ALWAYS_INLINE const_reverse_iterator crbegin() const noexcept { return reverse_iterator(_c4cptr + _c4sz); } + C4_ALWAYS_INLINE const_reverse_iterator rbegin() const noexcept { return reverse_iterator(_c4cptr + _c4csz); } + C4_ALWAYS_INLINE const_reverse_iterator crbegin() const noexcept { return reverse_iterator(_c4cptr + _c4csz); } C4_ALWAYS_INLINE reverse_iterator rend() noexcept { return const_reverse_iterator(_c4ptr); } C4_ALWAYS_INLINE const_reverse_iterator rend() const noexcept { return const_reverse_iterator(_c4cptr); } @@ -15789,6 +17202,7 @@ class spanrsl : public span_crtp> }; template using cspanrsl = spanrsl; +C4_SUPPRESS_WARNING_GCC_CLANG_POP } // namespace c4 @@ -15819,6 +17233,13 @@ template using cspanrsl = spanrsl; #error "amalgamate: file c4/span.hpp must have been included at this point" #endif /* C4_SPAN_HPP_ */ +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/compiler.hpp +//#include "c4/compiler.hpp" +#if !defined(C4_COMPILER_HPP_) && !defined(_C4_COMPILER_HPP_) +#error "amalgamate: file c4/compiler.hpp must have been included at this point" +#endif /* C4_COMPILER_HPP_ */ + /// @cond dev struct _c4t @@ -15851,31 +17272,32 @@ C4_CONSTEXPR14 cspan type_name() { const _c4t p = _c4tn(); -#if (0) // _C4_THIS_IS_A_DEBUG_SCAFFOLD +#if (0) // enable this to debug and find the offsets for(size_t index = 0; index < p.sz; ++index) - { printf(" %2c", p.str[index]); - } printf("\n"); for(size_t index = 0; index < p.sz; ++index) - { - printf(" %2d", (int)index); - } + printf(" %2zu", index); printf("\n"); #endif #if defined(_MSC_VER) # if defined(__clang__) // Visual Studio has the clang toolset +# if (_MSC_VER >= 1930) // do not use this: defined(C4_MSVC_2022) + // ..............................xxx. + // _c4t __cdecl _c4tn(void) [T = int] + enum : size_t { tstart = 30, tend = 1}; +# else // example: // ..........................xxx. // _c4t __cdecl _c4tn() [T = int] enum : size_t { tstart = 26, tend = 1}; - +# endif # elif defined(C4_MSVC_2015) || defined(C4_MSVC_2017) || defined(C4_MSVC_2019) || defined(C4_MSVC_2022) // Note: subtract 7 at the end because the function terminates with ">(void)" in VS2015+ cspan::size_type tstart = 26, tend = 7; - const char *s = p.str + tstart; // look at the start + const char *C4_RESTRICT s = p.str + tstart; // look at the start // we're not using strcmp() or memcmp() to spare the #include @@ -15976,27 +17398,62 @@ C4_CONSTEXPR14 C4_ALWAYS_INLINE cspan type_name(T const&) namespace c4 { +/** @defgroup doc_base64 Base64 encoding/decoding + * @see https://en.wikipedia.org/wiki/Base64 + * @see https://www.base64encode.org/ + * @{ */ + /** check that the given buffer is a valid base64 encoding * @see https://en.wikipedia.org/wiki/Base64 */ -bool base64_valid(csubstr encoded); +C4CORE_EXPORT bool base64_valid(csubstr encoded); + /** base64-encode binary data. * @param encoded [out] output buffer for encoded data * @param data [in] the input buffer with the binary data - * @return the number of bytes needed to return the output. No writes occur beyond the end of the output buffer. + * + * @return the number of bytes needed to return the output (ie the + * required size for @p encoded). No writes occur beyond the end of + * the output buffer, so it is safe to do a speculative call where the + * encoded buffer is empty, or maybe too small. The caller should + * ensure that the returned size is smaller than the size of the + * encoded buffer. + * + * @note the result depends on endianness. If transfer between + * little/big endian systems is desired, the caller should normalize + * @p data before encoding. + * * @see https://en.wikipedia.org/wiki/Base64 */ -size_t base64_encode(substr encoded, cblob data); +C4CORE_EXPORT size_t base64_encode(substr encoded, cblob data); + /** decode the base64 encoding in the given buffer * @param encoded [in] the encoded base64 * @param data [out] the output buffer - * @return the number of bytes needed to return the output.. No writes occur beyond the end of the output buffer. + * + * @return the number of bytes needed to return the output (ie the + * required size for @p data). No writes occur beyond the end of the + * output buffer, so it is safe to do a speculative call where the + * data buffer is empty, or maybe too small. The caller should ensure + * that the returned size is smaller than the size of the data buffer. + * + * @note the result depends on endianness. If transfer between + * little/big endian systems is desired, the caller should normalize + * @p data after decoding. + * * @see https://en.wikipedia.org/wiki/Base64 */ -size_t base64_decode(csubstr encoded, blob data); +C4CORE_EXPORT size_t base64_decode(csubstr encoded, blob data); +/** @} */ // base64 namespace fmt { +/** @addtogroup doc_format_specifiers + * @{ */ + +/** @defgroup doc_base64_fmt Base64 + * @{ */ + template struct base64_wrapper_ { @@ -16004,7 +17461,9 @@ struct base64_wrapper_ base64_wrapper_() : data() {} base64_wrapper_(blob_ blob) : data(blob) {} }; +/** a tag type to mark a payload as base64-encoded */ using const_base64_wrapper = base64_wrapper_; +/** a tag type to mark a payload to be encoded as base64 */ using base64_wrapper = base64_wrapper_; @@ -16043,16 +17502,22 @@ C4_ALWAYS_INLINE base64_wrapper base64(substr s) return base64_wrapper(blob(s.str, s.len)); } +/** @} */ // base64_fmt + +/** @} */ // format_specifiers + } // namespace fmt -/** write a variable in base64 format */ +/** write a variable in base64 format + * @ingroup doc_to_chars */ inline size_t to_chars(substr buf, fmt::const_base64_wrapper b) { return base64_encode(buf, b.data); } -/** read a variable in base64 format */ +/** read a variable in base64 format + * @ingroup doc_from_chars */ inline size_t from_chars(csubstr buf, fmt::base64_wrapper *b) { return base64_decode(buf, b->data); @@ -16104,7 +17569,7 @@ namespace c4 { C4_ALWAYS_INLINE c4::substr to_substr(std::string &s) noexcept { #if C4_CPP < 11 - #error this function will do undefined behavior + #error this function will have undefined behavior #endif // since c++11 it is legal to call s[s.size()]. return c4::substr(&s[0], s.size()); @@ -16118,7 +17583,7 @@ C4_ALWAYS_INLINE c4::substr to_substr(std::string &s) noexcept C4_ALWAYS_INLINE c4::csubstr to_csubstr(std::string const& s) noexcept { #if C4_CPP < 11 - #error this function will do undefined behavior + #error this function will have undefined behavior #endif // since c++11 it is legal to call s[s.size()]. return c4::csubstr(&s[0], s.size()); @@ -16184,6 +17649,102 @@ inline bool from_chars(c4::csubstr buf, std::string * s) +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/std/string_view.hpp +// https://github.com/biojppm/c4core/src/c4/std/string_view.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_STD_STRING_VIEW_HPP_ +#define _C4_STD_STRING_VIEW_HPP_ + +/** @file string_view.hpp */ + +#ifndef C4CORE_SINGLE_HEADER +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/language.hpp +//#include "c4/language.hpp" +#if !defined(C4_LANGUAGE_HPP_) && !defined(_C4_LANGUAGE_HPP_) +#error "amalgamate: file c4/language.hpp must have been included at this point" +#endif /* C4_LANGUAGE_HPP_ */ + +#endif + +#if (C4_CPP >= 17 && defined(__cpp_lib_string_view)) || defined(__DOXYGEN__) + +#ifndef C4CORE_SINGLE_HEADER +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/substr.hpp +//#include "c4/substr.hpp" +#if !defined(C4_SUBSTR_HPP_) && !defined(_C4_SUBSTR_HPP_) +#error "amalgamate: file c4/substr.hpp must have been included at this point" +#endif /* C4_SUBSTR_HPP_ */ + +#endif + +#include + + +namespace c4 { + +//----------------------------------------------------------------------------- + +/** create a csubstr from an existing std::string_view. */ +C4_ALWAYS_INLINE c4::csubstr to_csubstr(std::string_view s) noexcept +{ + return c4::csubstr(s.data(), s.size()); +} + + +//----------------------------------------------------------------------------- + +C4_ALWAYS_INLINE bool operator== (c4::csubstr ss, std::string_view s) { return ss.compare(s.data(), s.size()) == 0; } +C4_ALWAYS_INLINE bool operator!= (c4::csubstr ss, std::string_view s) { return ss.compare(s.data(), s.size()) != 0; } +C4_ALWAYS_INLINE bool operator>= (c4::csubstr ss, std::string_view s) { return ss.compare(s.data(), s.size()) >= 0; } +C4_ALWAYS_INLINE bool operator> (c4::csubstr ss, std::string_view s) { return ss.compare(s.data(), s.size()) > 0; } +C4_ALWAYS_INLINE bool operator<= (c4::csubstr ss, std::string_view s) { return ss.compare(s.data(), s.size()) <= 0; } +C4_ALWAYS_INLINE bool operator< (c4::csubstr ss, std::string_view s) { return ss.compare(s.data(), s.size()) < 0; } + +C4_ALWAYS_INLINE bool operator== (std::string_view s, c4::csubstr ss) { return ss.compare(s.data(), s.size()) == 0; } +C4_ALWAYS_INLINE bool operator!= (std::string_view s, c4::csubstr ss) { return ss.compare(s.data(), s.size()) != 0; } +C4_ALWAYS_INLINE bool operator<= (std::string_view s, c4::csubstr ss) { return ss.compare(s.data(), s.size()) >= 0; } +C4_ALWAYS_INLINE bool operator< (std::string_view s, c4::csubstr ss) { return ss.compare(s.data(), s.size()) > 0; } +C4_ALWAYS_INLINE bool operator>= (std::string_view s, c4::csubstr ss) { return ss.compare(s.data(), s.size()) <= 0; } +C4_ALWAYS_INLINE bool operator> (std::string_view s, c4::csubstr ss) { return ss.compare(s.data(), s.size()) < 0; } + + +//----------------------------------------------------------------------------- + +/** copy an std::string_view to a writeable substr */ +inline size_t to_chars(c4::substr buf, std::string_view s) +{ + C4_ASSERT(!buf.overlaps(to_csubstr(s))); + size_t sz = s.size(); + size_t len = buf.len < sz ? buf.len : sz; + // calling memcpy with null strings is undefined behavior + // and will wreak havoc in calling code's branches. + // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637 + if(len) + { + C4_ASSERT(s.data() != nullptr); + C4_ASSERT(buf.str != nullptr); + memcpy(buf.str, s.data(), len); + } + return sz; // return the number of needed chars +} + +} // namespace c4 + +#endif // C4_STRING_VIEW_AVAILABLE + +#endif // _C4_STD_STRING_VIEW_HPP_ + + +// (end https://github.com/biojppm/c4core/src/c4/std/string_view.hpp) + + + //******************************************************************************** //-------------------------------------------------------------------------------- // src/c4/std/vector.hpp @@ -16514,10 +18075,17 @@ inline size_t unformat(csubstr buf, csubstr fmt, std::tuple< Types... > & tp) #include +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +#endif + namespace c4 { namespace rng { - class splitmix { public: @@ -16693,6 +18261,12 @@ inline bool operator!=(pcg const &lhs, pcg const &rhs) } // namespace rng } // namespace c4 +#ifdef __clang__ +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#endif + #endif /* AG_RANDOM_H */ @@ -16740,6 +18314,8 @@ inline bool operator!=(pcg const &lhs, pcg const &rhs) //included above: //#include #include +//included above: +//#include namespace stdext { @@ -16792,7 +18368,15 @@ template struct vtable explicit constexpr vtable() noexcept : invoke_ptr{ [](storage_ptr_t, Args&&...) -> R - { throw std::bad_function_call(); } + { + #if (defined(_MSC_VER) && (defined(_CPPUNWIND) && (__CPPUNWIND == 1))) \ + || \ + (defined(__EXCEPTIONS) || defined(__cpp_exceptions)) + throw std::bad_function_call(); + #else + std::abort(); + #endif + } }, copy_ptr{ [](storage_ptr_t, storage_ptr_t) noexcept -> void {} }, move_ptr{ [](storage_ptr_t, storage_ptr_t) noexcept -> void {} }, @@ -17122,9 +18706,11 @@ void foo() {} // to avoid empty file warning from the linker #ifdef __clang__ # pragma clang diagnostic push # pragma clang diagnostic ignored "-Wformat-nonliteral" +# pragma clang diagnostic ignored "-Wold-style-cast" #elif defined(__GNUC__) # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Wformat-nonliteral" +# pragma GCC diagnostic ignored "-Wold-style-cast" #endif namespace c4 { @@ -17153,13 +18739,19 @@ size_t to_chars(substr buf, fmt::const_raw_wrapper r) bool from_chars(csubstr buf, fmt::raw_wrapper *r) { + C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wcast-qual") void * vptr = (void*)buf.str; + C4_SUPPRESS_WARNING_GCC_POP size_t space = buf.len; auto ptr = (decltype(buf.str)) std::align(r->alignment, r->len, vptr, space); C4_CHECK(ptr != nullptr); C4_CHECK(ptr >= buf.begin() && ptr <= buf.end()); - //size_t dim = (ptr - buf.str) + r->len; + C4_SUPPRESS_WARNING_GCC_PUSH + #if defined(__GNUC__) && __GNUC__ > 9 + C4_SUPPRESS_WARNING_GCC("-Wanalyzer-null-argument") + #endif memcpy(r->buf, ptr, r->len); + C4_SUPPRESS_WARNING_GCC_POP return true; } @@ -17204,13 +18796,14 @@ bool from_chars(csubstr buf, fmt::raw_wrapper *r) namespace c4 { + /** Fills 'dest' with the first 'pattern_size' bytes at 'pattern', 'num_times'. */ void mem_repeat(void* dest, void const* pattern, size_t pattern_size, size_t num_times) { if(C4_UNLIKELY(num_times == 0)) return; C4_ASSERT( ! mem_overlaps(dest, pattern, num_times*pattern_size, pattern_size)); - char *begin = (char*)dest; + char *begin = static_cast(dest); char *end = begin + num_times * pattern_size; // copy the pattern once ::memcpy(begin, pattern, pattern_size); @@ -17228,6 +18821,7 @@ void mem_repeat(void* dest, void const* pattern, size_t pattern_size, size_t num } } + } // namespace c4 #endif /* C4CORE_SINGLE_HDR_DEFINE_NOW */ @@ -17308,6 +18902,8 @@ constexpr const size_t char_traits< wchar_t >::num_whitespace_chars; namespace c4 { +C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast") + namespace detail { @@ -17330,38 +18926,38 @@ void afree_impl(void *ptr) void* aalloc_impl(size_t size, size_t alignment) { + // alignment must be nonzero and a power of 2 + C4_CHECK(alignment > 0 && (alignment & (alignment - 1u)) == 0); + // NOTE: alignment needs to be sized in multiples of sizeof(void*) + if(C4_UNLIKELY(alignment < sizeof(void*))) + alignment = sizeof(void*); + static_assert((sizeof(void*) & (sizeof(void*)-1u)) == 0, "sizeof(void*) must be a power of 2"); + C4_CHECK(((alignment & (sizeof(void*) - 1u))) == 0u); void *mem; #if defined(C4_WIN) || defined(C4_XBOX) mem = ::_aligned_malloc(size, alignment); C4_CHECK(mem != nullptr || size == 0); -#elif defined(C4_ARM) - // https://stackoverflow.com/questions/53614538/undefined-reference-to-posix-memalign-in-arm-gcc - // https://electronics.stackexchange.com/questions/467382/e2-studio-undefined-reference-to-posix-memalign/467753 - mem = memalign(alignment, size); - C4_CHECK(mem != nullptr || size == 0); #elif defined(C4_POSIX) || defined(C4_IOS) || defined(C4_MACOS) - // NOTE: alignment needs to be sized in multiples of sizeof(void*) - size_t amult = alignment; - if(C4_UNLIKELY(alignment < sizeof(void*))) - { - amult = sizeof(void*); - } - int ret = ::posix_memalign(&mem, amult, size); + int ret = ::posix_memalign(&mem, alignment, size); if(C4_UNLIKELY(ret)) { - if(ret == EINVAL) - { - C4_ERROR("The alignment argument %zu was not a power of two, " - "or was not a multiple of sizeof(void*)", alignment); - } - else if(ret == ENOMEM) + C4_ASSERT(ret != EINVAL); // this was already handled above + if(ret == ENOMEM) { C4_ERROR("There was insufficient memory to fulfill the " "allocation request of %zu bytes (alignment=%lu)", size, size); } return nullptr; } +#elif defined(C4_ARM) || defined(C4_ANDROID) + // https://stackoverflow.com/questions/53614538/undefined-reference-to-posix-memalign-in-arm-gcc + // https://electronics.stackexchange.com/questions/467382/e2-studio-undefined-reference-to-posix-memalign/467753 + mem = memalign(alignment, size); + C4_CHECK(mem != nullptr || size == 0); #else + (void)size; + (void)alignment; + mem = nullptr; C4_NOT_IMPLEMENTED_MSG("need to implement an aligned allocation for this platform"); #endif C4_ASSERT_MSG((uintptr_t(mem) & (alignment-1)) == 0, "address %p is not aligned to %zu boundary", mem, alignment); @@ -17496,7 +19092,6 @@ void* MemoryResourceLinear::do_allocate(size_t sz, size_t alignment, void *hint) if(m_pos + sz > m_size) { C4_ERROR("out of memory"); - return nullptr; } void *mem = m_mem + m_pos; size_t space = m_size - m_pos; @@ -17511,7 +19106,6 @@ void* MemoryResourceLinear::do_allocate(size_t sz, size_t alignment, void *hint) else { C4_ERROR("could not align memory"); - mem = nullptr; } return mem; } @@ -17570,6 +19164,8 @@ void* MemoryResourceLinear::do_reallocate(void* ptr, size_t oldsz, size_t newsz, * * */ +C4_SUPPRESS_WARNING_GCC_CLANG_POP + } // namespace c4 @@ -17663,6 +19259,8 @@ void operator delete[](void *p, size_t, std::nothrow_t) namespace c4 { +C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast") + size_t decode_code_point(uint8_t *C4_RESTRICT buf, size_t buflen, const uint32_t code) { C4_UNUSED(buflen); @@ -17713,6 +19311,8 @@ substr decode_code_point(substr out, csubstr code_point) return out.first(ret); } +C4_SUPPRESS_WARNING_GCC_CLANG_POP + } // namespace c4 #endif /* C4CORE_SINGLE_HDR_DEFINE_NOW */ @@ -17741,10 +19341,12 @@ substr decode_code_point(substr out, csubstr code_point) #ifdef __clang__ # pragma clang diagnostic push # pragma clang diagnostic ignored "-Wchar-subscripts" // array subscript is of type 'char' +# pragma clang diagnostic ignored "-Wold-style-cast" #elif defined(__GNUC__) # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Wchar-subscripts" # pragma GCC diagnostic ignored "-Wtype-limits" +# pragma GCC diagnostic ignored "-Wold-style-cast" #endif namespace c4 { @@ -17832,7 +19434,8 @@ void base64_test_tables() bool base64_valid(csubstr encoded) { - if(encoded.len % 4) return false; + if(encoded.len & 3u) // (encoded.len % 4u) + return false; for(const char c : encoded) { if(c < 0/* || c >= 128*/) @@ -17854,10 +19457,9 @@ size_t base64_encode(substr buf, cblob data) C4_XASSERT((char_idx) < sizeof(detail::base64_sextet_to_char_));\ c4append_(detail::base64_sextet_to_char_[(char_idx)]);\ } - size_t rem, pos = 0; constexpr const uint32_t sextet_mask = uint32_t(1 << 6) - 1; - const unsigned char *C4_RESTRICT d = (unsigned char *) data.buf; // cast to unsigned to avoid wrapping high-bits + const unsigned char *C4_RESTRICT d = (const unsigned char *) data.buf; // cast to unsigned to avoid wrapping high-bits for(rem = data.len; rem >= 3; rem -= 3, d += 3) { const uint32_t val = ((uint32_t(d[0]) << 16) | (uint32_t(d[1]) << 8) | (uint32_t(d[2]))); @@ -17899,9 +19501,8 @@ size_t base64_decode(csubstr encoded, blob data) C4_XASSERT(size_t(c) < sizeof(detail::base64_char_to_sextet_));\ val |= static_cast(detail::base64_char_to_sextet_[(c)]) << ((shift) * 6);\ } - C4_ASSERT(base64_valid(encoded)); - C4_CHECK(encoded.len % 4 == 0); + C4_CHECK((encoded.len & 3u) == 0); size_t wpos = 0; // the write position const char *C4_RESTRICT d = encoded.str; constexpr const uint32_t full_byte = 0xff; @@ -18232,9 +19833,11 @@ size_t base64_decode(csubstr encoded, blob data) #ifdef __clang__ # pragma clang diagnostic push # pragma clang diagnostic ignored "-Wformat-nonliteral" +# pragma clang diagnostic ignored "-Wold-style-cast" #elif defined(__GNUC__) # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Wformat-nonliteral" +# pragma GCC diagnostic ignored "-Wold-style-cast" #endif @@ -18244,6 +19847,7 @@ namespace c4 { static error_flags s_error_flags = ON_ERROR_DEFAULTS; static error_callback_type s_error_callback = nullptr; + //----------------------------------------------------------------------------- error_flags get_error_flags() @@ -18265,6 +19869,7 @@ void set_error_callback(error_callback_type cb) s_error_callback = cb; } + //----------------------------------------------------------------------------- void handle_error(srcloc where, const char *fmt, ...) @@ -18297,23 +19902,24 @@ void handle_error(srcloc where, const char *fmt, ...) { if(s_error_callback) { - s_error_callback(buf, msglen/*ss.c_strp(), ss.tellp()*/); + s_error_callback(buf, msglen); } } - if(s_error_flags & ON_ERROR_ABORT) + if(s_error_flags & ON_ERROR_THROW) { - abort(); +#if defined(C4_EXCEPTIONS_ENABLED) && defined(C4_ERROR_THROWS_EXCEPTION) + throw std::runtime_error(buf); +#endif } - if(s_error_flags & ON_ERROR_THROW) + if(s_error_flags & ON_ERROR_ABORT) { -#if defined(C4_EXCEPTIONS_ENABLED) && defined(C4_ERROR_THROWS_EXCEPTION) - throw Exception(buf); -#else abort(); -#endif } + + abort(); // abort anyway, in case nothing was set + C4_UNREACHABLE_AFTER_ERR(); } //----------------------------------------------------------------------------- @@ -18321,20 +19927,19 @@ void handle_error(srcloc where, const char *fmt, ...) void handle_warning(srcloc where, const char *fmt, ...) { va_list args; - char buf[1024]; //sstream ss; + char buf[1024]; va_start(args, fmt); vsnprintf(buf, sizeof(buf), fmt, args); va_end(args); C4_LOGF_WARN("\n"); #if defined(C4_ERROR_SHOWS_FILELINE) && defined(C4_ERROR_SHOWS_FUNC) - C4_LOGF_WARN("%s:%d: WARNING: %s\n", where.file, where.line, buf/*ss.c_strp()*/); + C4_LOGF_WARN("%s:%d: WARNING: %s\n", where.file, where.line, buf); C4_LOGF_WARN("%s:%d: WARNING: here: %s\n", where.file, where.line, where.func); #elif defined(C4_ERROR_SHOWS_FILELINE) - C4_LOGF_WARN("%s:%d: WARNING: %s\n", where.file, where.line, buf/*ss.c_strp()*/); + C4_LOGF_WARN("%s:%d: WARNING: %s\n", where.file, where.line, buf); #elif ! defined(C4_ERROR_SHOWS_FUNC) - C4_LOGF_WARN("WARNING: %s\n", buf/*ss.c_strp()*/); + C4_LOGF_WARN("WARNING: %s\n", buf); #endif - //c4::log.flush(); } //----------------------------------------------------------------------------- @@ -18346,33 +19951,38 @@ bool is_debugger_attached() if(first_call) { first_call = false; + C4_SUPPRESS_WARNING_GCC_PUSH + #if defined(__GNUC__) && __GNUC__ > 9 + C4_SUPPRESS_WARNING_GCC("-Wanalyzer-fd-leak") + #endif //! @see http://stackoverflow.com/questions/3596781/how-to-detect-if-the-current-process-is-being-run-by-gdb //! (this answer: http://stackoverflow.com/a/24969863/3968589 ) char buf[1024] = ""; - int status_fd = open("/proc/self/status", O_RDONLY); if (status_fd == -1) { return 0; } - - ssize_t num_read = ::read(status_fd, buf, sizeof(buf)); - - if (num_read > 0) + else { - static const char TracerPid[] = "TracerPid:"; - char *tracer_pid; - - if(num_read < 1024) + ssize_t num_read = ::read(status_fd, buf, sizeof(buf)); + if (num_read > 0) { - buf[num_read] = 0; - } - tracer_pid = strstr(buf, TracerPid); - if (tracer_pid) - { - first_call_result = !!::atoi(tracer_pid + sizeof(TracerPid) - 1); + static const char TracerPid[] = "TracerPid:"; + char *tracer_pid; + if(num_read < 1024) + { + buf[num_read] = 0; + } + tracer_pid = strstr(buf, TracerPid); + if (tracer_pid) + { + first_call_result = !!::atoi(tracer_pid + sizeof(TracerPid) - 1); + } } + close(status_fd); } + C4_SUPPRESS_WARNING_GCC_POP } return first_call_result; #elif defined(C4_PS4) @@ -18406,6 +20016,7 @@ bool is_debugger_attached() size = sizeof(info); junk = sysctl(mib, sizeof(mib) / sizeof(*mib), &info, &size, NULL, 0); assert(junk == 0); + (void)junk; // We're being debugged if the P_TRACED flag is set. return ((info.kp_proc.p_flag & P_TRACED) != 0); @@ -18469,16 +20080,58 @@ bool is_debugger_attached() //******************************************************************************** //-------------------------------------------------------------------------------- -// src/c4/yml/common.hpp -// https://github.com/biojppm/rapidyaml/src/c4/yml/common.hpp +// src/c4/yml/fwd.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/fwd.hpp //-------------------------------------------------------------------------------- //******************************************************************************** -#ifndef _C4_YML_COMMON_HPP_ -#define _C4_YML_COMMON_HPP_ +#ifndef _C4_YML_FWD_HPP_ +#define _C4_YML_FWD_HPP_ + +/** @file fwd.hpp forward declarations */ + +namespace c4 { +namespace yml { + +struct NodeScalar; +struct NodeInit; +struct NodeData; +struct NodeType; +class NodeRef; +class ConstNodeRef; +class Tree; +struct ReferenceResolver; +template class ParseEngine; +struct EventHandlerTree; +using Parser = ParseEngine; + +} // namespace c4 +} // namespace yml + +#endif /* _C4_YML_FWD_HPP_ */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/fwd.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/version.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/version.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_VERSION_HPP_ +#define _C4_YML_VERSION_HPP_ + +/** @file version.hpp */ + +#define RYML_VERSION "0.7.2" +#define RYML_VERSION_MAJOR 0 +#define RYML_VERSION_MINOR 7 +#define RYML_VERSION_PATCH 2 -//included above: -//#include // amalgamate: removed include of // https://github.com/biojppm/rapidyaml/src/c4/substr.hpp //#include @@ -18494,64 +20147,280 @@ bool is_debugger_attached() #endif /* C4_YML_EXPORT_HPP_ */ +namespace c4 { +namespace yml { -#ifndef RYML_USE_ASSERT -# define RYML_USE_ASSERT C4_USE_ASSERT -#endif +RYML_EXPORT csubstr version(); +RYML_EXPORT int version_major(); +RYML_EXPORT int version_minor(); +RYML_EXPORT int version_patch(); +} // namespace yml +} // namespace c4 -#if RYML_USE_ASSERT -# define RYML_ASSERT(cond) RYML_CHECK(cond) -# define RYML_ASSERT_MSG(cond, msg) RYML_CHECK_MSG(cond, msg) -#else -# define RYML_ASSERT(cond) -# define RYML_ASSERT_MSG(cond, msg) -#endif +#endif /* _C4_YML_VERSION_HPP_ */ -#if defined(NDEBUG) || defined(C4_NO_DEBUG_BREAK) -# define RYML_DEBUG_BREAK() +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/version.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/common.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/common.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_COMMON_HPP_ +#define _C4_YML_COMMON_HPP_ + +/** @file common.hpp Common utilities and infrastructure used by ryml. */ + +//included above: +//#include +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/substr.hpp +//#include +#if !defined(C4_SUBSTR_HPP_) && !defined(_C4_SUBSTR_HPP_) +#error "amalgamate: file c4/substr.hpp must have been included at this point" +#endif /* C4_SUBSTR_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/dump.hpp +//#include +#if !defined(C4_DUMP_HPP_) && !defined(_C4_DUMP_HPP_) +#error "amalgamate: file c4/dump.hpp must have been included at this point" +#endif /* C4_DUMP_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/export.hpp +//#include +#if !defined(C4_YML_EXPORT_HPP_) && !defined(_C4_YML_EXPORT_HPP_) +#error "amalgamate: file c4/yml/export.hpp must have been included at this point" +#endif /* C4_YML_EXPORT_HPP_ */ + + +#if defined(C4_MSVC) || defined(C4_MINGW) +//included above: +//#include #else -# define RYML_DEBUG_BREAK() \ - { \ - if(c4::get_error_flags() & c4::ON_ERROR_DEBUGBREAK) \ - { \ - C4_DEBUG_BREAK(); \ - } \ - } +#include +#endif + + + +//----------------------------------------------------------------------------- + +#ifndef RYML_ERRMSG_SIZE +/// size for the error message buffer +#define RYML_ERRMSG_SIZE (1024) +#endif + +#ifndef RYML_LOGBUF_SIZE +/// size for the buffer used to format individual values to string +/// while preparing an error message. This is only used for formatting +/// individual values in the message; final messages will be larger +/// than this value (see @ref RYML_ERRMSG_SIZE). This is also used for +/// the detailed debug log messages when RYML_DBG is defined. +#define RYML_LOGBUF_SIZE (256) +#endif + +#ifndef RYML_LOGBUF_SIZE_MAX +/// size for the fallback larger log buffer. When @ref +/// RYML_LOGBUF_SIZE is not large enough to convert a value to string, +/// then temporary stack memory is allocated up to +/// RYML_LOGBUF_SIZE_MAX. This limit is in place to prevent a stack +/// overflow. If the printed value requires more than +/// RYML_LOGBUF_SIZE_MAX, the value is silently skipped. +#define RYML_LOGBUF_SIZE_MAX (1024) +#endif + +#ifndef RYML_LOCATIONS_SMALL_THRESHOLD +/// threshold at which a location search will revert from linear to +/// binary search. +#define RYML_LOCATIONS_SMALL_THRESHOLD (30) +#endif + + +//----------------------------------------------------------------------------- +// Specify groups to have a predefined topic order in doxygen: + +/** @defgroup doc_quickstart Quickstart + * + * Example code for every feature. + */ + +/** @defgroup doc_parse Parse utilities + * @see sample::sample_parse_in_place + * @see sample::sample_parse_in_arena + * @see sample::sample_parse_file + * @see sample::sample_parse_reuse_tree + * @see sample::sample_parse_reuse_parser + * @see sample::sample_parse_reuse_tree_and_parser + * @see sample::sample_location_tracking + */ + +/** @defgroup doc_emit Emit utilities + * + * Utilities to emit YAML and JSON, either to a memory buffer or to a + * file or ostream-like class. + * + * @see sample::sample_emit_to_container + * @see sample::sample_emit_to_stream + * @see sample::sample_emit_to_file + * @see sample::sample_emit_nested_node + * @see sample::sample_emit_style + */ + +/** @defgroup doc_node_type Node types + */ + +/** @defgroup doc_tree Tree utilities + * @see sample::sample_quick_overview + * @see sample::sample_iterate_trees + * @see sample::sample_create_trees + * @see sample::sample_tree_arena + * + * @see sample::sample_static_trees + * @see sample::sample_location_tracking + * + * @see sample::sample_docs + * @see sample::sample_anchors_and_aliases + * @see sample::sample_tags + */ + +/** @defgroup doc_node_classes Node classes + * + * High-level node classes. + * + * @see sample::sample_quick_overview + * @see sample::sample_iterate_trees + * @see sample::sample_create_trees + * @see sample::sample_tree_arena + */ + +/** @defgroup doc_callbacks Callbacks for errors and allocation + * + * Functions called by ryml to allocate/free memory and to report + * errors. + * + * @see sample::sample_error_handler + * @see sample::sample_global_allocator + * @see sample::sample_per_tree_allocator + */ + +/** @defgroup doc_serialization Serialization/deserialization + * + * Contains information on how to serialize and deserialize + * fundamental types, user scalar types, user container types and + * interop with std scalar/container types. + * + */ + +/** @defgroup doc_ref_utils Anchor/Reference utilities + * + * @see sample::sample_anchors_and_aliases + * */ + +/** @defgroup doc_tag_utils Tag utilities + * @see sample::sample_tags + */ + +/** @defgroup doc_preprocessors Preprocessors + * + * Functions for preprocessing YAML prior to parsing. + */ + + +//----------------------------------------------------------------------------- + +// document macros for doxygen +#ifdef __DOXYGEN__ // defined in Doxyfile::PREDEFINED + +/** define this macro with a boolean value to enable/disable + * assertions to check preconditions and assumptions throughout the + * codebase; this causes a slowdown of the code, and larger code + * size. By default, this macro is defined unless NDEBUG is defined + * (see C4_USE_ASSERT); as a result, by default this macro is truthy + * only in debug builds. */ +# define RYML_USE_ASSERT + +/** (Undefined by default) Define this macro to disable ryml's default + * implementation of the callback functions; see @ref c4::yml::Callbacks */ +# define RYML_NO_DEFAULT_CALLBACKS + +/** (Undefined by default) When this macro is defined (and + * @ref RYML_NO_DEFAULT_CALLBACKS is not defined), the default error + * handler will throw C++ exceptions of type `std::runtime_error`. */ +# define RYML_DEFAULT_CALLBACK_USES_EXCEPTIONS + +/** Conditionally expands to `noexcept` when @ref RYML_USE_ASSERT is 0 and + * is empty otherwise. The user is unable to override this macro. */ +# define RYML_NOEXCEPT + +#endif + + +//----------------------------------------------------------------------------- + + +/** @cond dev*/ +#ifndef RYML_USE_ASSERT +# define RYML_USE_ASSERT C4_USE_ASSERT +#endif + +#if RYML_USE_ASSERT +# define RYML_ASSERT(cond) RYML_CHECK(cond) +# define RYML_ASSERT_MSG(cond, msg) RYML_CHECK_MSG(cond, msg) +# define _RYML_CB_ASSERT(cb, cond) _RYML_CB_CHECK((cb), (cond)) +# define _RYML_CB_ASSERT_(cb, cond, loc) _RYML_CB_CHECK((cb), (cond), (loc)) +# define RYML_NOEXCEPT +#else +# define RYML_ASSERT(cond) +# define RYML_ASSERT_MSG(cond, msg) +# define _RYML_CB_ASSERT(cb, cond) +# define _RYML_CB_ASSERT_(cb, cond, loc) +# define RYML_NOEXCEPT noexcept #endif +#define RYML_DEPRECATED(msg) C4_DEPRECATED(msg) #define RYML_CHECK(cond) \ do { \ - if(!(cond)) \ + if(C4_UNLIKELY(!(cond))) \ { \ - RYML_DEBUG_BREAK() \ + RYML_DEBUG_BREAK(); \ c4::yml::error("check failed: " #cond, c4::yml::Location(__FILE__, __LINE__, 0)); \ + C4_UNREACHABLE_AFTER_ERR(); \ } \ } while(0) #define RYML_CHECK_MSG(cond, msg) \ do \ { \ - if(!(cond)) \ + if(C4_UNLIKELY(!(cond))) \ { \ - RYML_DEBUG_BREAK() \ + RYML_DEBUG_BREAK(); \ c4::yml::error(msg ": check failed: " #cond, c4::yml::Location(__FILE__, __LINE__, 0)); \ + C4_UNREACHABLE_AFTER_ERR(); \ } \ } while(0) - -#if C4_CPP >= 14 -# define RYML_DEPRECATED(msg) [[deprecated(msg)]] +#if defined(RYML_DBG) && !defined(NDEBUG) && !defined(C4_NO_DEBUG_BREAK) +# define RYML_DEBUG_BREAK() \ + do { \ + if(c4::get_error_flags() & c4::ON_ERROR_DEBUGBREAK) \ + { \ + C4_DEBUG_BREAK(); \ + } \ + } while(0) #else -# if defined(_MSC_VER) -# define RYML_DEPRECATED(msg) __declspec(deprecated(msg)) -# else // defined(__GNUC__) || defined(__clang__) -# define RYML_DEPRECATED(msg) __attribute__((deprecated(msg))) -# endif +# define RYML_DEBUG_BREAK() #endif +/** @endcond */ + //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- @@ -18560,11 +20429,35 @@ bool is_debugger_attached() namespace c4 { namespace yml { -enum : size_t { - /** a null position */ - npos = size_t(-1), +C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast") + + +#ifndef RYML_ID_TYPE +/** The type of a node id in the YAML tree. In the future, the default + * will likely change to int32_t, which was observed to be faster. + * @see id_type */ +#define RYML_ID_TYPE size_t +#endif + + +/** The type of a node id in the YAML tree; to override the default + * type, define the macro @ref RYML_ID_TYPE to a suitable integer + * type. */ +using id_type = RYML_ID_TYPE; +static_assert(std::is_integral::value, "id_type must be an integer type"); + + +C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wuseless-cast") +enum : id_type { /** an index to none */ - NONE = size_t(-1) + NONE = id_type(-1), +}; +C4_SUPPRESS_WARNING_GCC_CLANG_POP + + +enum : size_t { + /** a null string position */ + npos = size_t(-1) }; @@ -18582,72 +20475,96 @@ struct RYML_EXPORT LineCol //! column size_t col; - LineCol() : offset(), line(), col() {} + LineCol() = default; //! construct from line and column LineCol(size_t l, size_t c) : offset(0), line(l), col(c) {} //! construct from offset, line and column LineCol(size_t o, size_t l, size_t c) : offset(o), line(l), col(c) {} }; +static_assert(std::is_trivial::value, "LineCol not trivial"); +static_assert(std::is_standard_layout::value, "Location not trivial"); //! a source file position -struct RYML_EXPORT Location : public LineCol +struct RYML_EXPORT Location { + //! number of bytes from the beginning of the source buffer + size_t offset; + //! line + size_t line; + //! column + size_t col; + //! file name csubstr name; - operator bool () const { return !name.empty() || line != 0 || offset != 0; } + operator bool () const { return !name.empty() || line != 0 || offset != 0 || col != 0; } + operator LineCol const& () const { return reinterpret_cast(*this); } - Location() : LineCol(), name() {} - Location( size_t l, size_t c) : LineCol{ l, c}, name( ) {} - Location( csubstr n, size_t l, size_t c) : LineCol{ l, c}, name(n) {} - Location( csubstr n, size_t b, size_t l, size_t c) : LineCol{b, l, c}, name(n) {} - Location(const char *n, size_t l, size_t c) : LineCol{ l, c}, name(to_csubstr(n)) {} - Location(const char *n, size_t b, size_t l, size_t c) : LineCol{b, l, c}, name(to_csubstr(n)) {} + Location() = default; + Location( size_t l, size_t c) : offset( ), line(l), col(c), name( ) {} + Location( size_t b, size_t l, size_t c) : offset(b), line(l), col(c), name( ) {} + Location( csubstr n, size_t l, size_t c) : offset( ), line(l), col(c), name(n) {} + Location( csubstr n, size_t b, size_t l, size_t c) : offset(b), line(l), col(c), name(n) {} + Location(const char *n, size_t l, size_t c) : offset( ), line(l), col(c), name(to_csubstr(n)) {} + Location(const char *n, size_t b, size_t l, size_t c) : offset(b), line(l), col(c), name(to_csubstr(n)) {} }; +static_assert(std::is_standard_layout::value, "Location not trivial"); //----------------------------------------------------------------------------- -/** the type of the function used to report errors. This function must - * interrupt execution, either by raising an exception or calling - * std::abort(). +/** @addtogroup doc_callbacks * - * @warning the error callback must never return: it must either abort - * or throw an exception. Otherwise, the parser will enter into an - * infinite loop, or the program may crash. */ -using pfn_error = void (*)(const char* msg, size_t msg_len, Location location, void *user_data); -/** the type of the function used to allocate memory */ -using pfn_allocate = void* (*)(size_t len, void* hint, void *user_data); -/** the type of the function used to free memory */ -using pfn_free = void (*)(void* mem, size_t size, void *user_data); + * @{ */ -/** trigger an error: call the current error callback. */ -RYML_EXPORT void error(const char *msg, size_t msg_len, Location loc); -/** @overload error */ -inline void error(const char *msg, size_t msg_len) -{ - error(msg, msg_len, Location{}); -} -/** @overload error */ -template -inline void error(const char (&msg)[N], Location loc) -{ - error(msg, N-1, loc); -} -/** @overload error */ -template -inline void error(const char (&msg)[N]) -{ - error(msg, N-1, Location{}); -} +struct Callbacks; -//----------------------------------------------------------------------------- -/** a c-style callbacks class +/** set the global callbacks for the library; after a call to this + * function, these callbacks will be used by newly created objects + * (unless they are copying older objects with different + * callbacks). If @ref RYML_NO_DEFAULT_CALLBACKS is defined, it is + * mandatory to call this function prior to using any other library + * facility. + * + * @warning This function is NOT thread-safe. + * + * @warning the error callback must never return: see @ref pfn_error + * for more details */ +RYML_EXPORT void set_callbacks(Callbacks const& c); + +/** get the global callbacks + * @warning This function is not thread-safe. */ +RYML_EXPORT Callbacks const& get_callbacks(); + +/** set the global callbacks back to their defaults () + * @warning This function is not thread-safe. */ +RYML_EXPORT void reset_callbacks(); + + +/** the type of the function used to report errors * - * @warning the error callback must never return: it must either abort - * or throw an exception. Otherwise, the parser will enter into an - * infinite loop, or the program may crash. */ + * @warning When given by the user, this function MUST interrupt + * execution, typically by either throwing an exception, or using + * `std::longjmp()` ([see + * documentation](https://en.cppreference.com/w/cpp/utility/program/setjmp)) + * or by calling `std::abort()`. If the function returned, the parser + * would enter into an infinite loop, or the program may crash. */ +using pfn_error = void (*) (const char* msg, size_t msg_len, Location location, void *user_data); + + +/** the type of the function used to allocate memory; ryml will only + * allocate memory through this callback. */ +using pfn_allocate = void* (*)(size_t len, void* hint, void *user_data); + + +/** the type of the function used to free memory; ryml will only free + * memory through this callback. */ +using pfn_free = void (*)(void* mem, size_t size, void *user_data); + + +/** a c-style callbacks class. Can be used globally by the library + * and/or locally by @ref Tree and @ref Parser objects. */ struct RYML_EXPORT Callbacks { void * m_user_data; @@ -18655,8 +20572,32 @@ struct RYML_EXPORT Callbacks pfn_free m_free; pfn_error m_error; + /** Construct an object with the default callbacks. If + * @ref RYML_NO_DEFAULT_CALLBACKS is defined, the object will have null + * members.*/ Callbacks(); - Callbacks(void *user_data, pfn_allocate alloc, pfn_free free, pfn_error error_); + + /** Construct an object with the given callbacks. + * + * @param user_data Data to be forwarded in every call to a callback. + * + * @param alloc A pointer to an allocate function. Unless + * @ref RYML_NO_DEFAULT_CALLBACKS is defined, when this + * parameter is null, will fall back to ryml's default + * alloc implementation. + * + * @param free A pointer to a free function. Unless + * @ref RYML_NO_DEFAULT_CALLBACKS is defined, when this + * parameter is null, will fall back to ryml's default free + * implementation. + * + * @param error A pointer to an error function, which must never + * return (see @ref pfn_error). Unless + * @ref RYML_NO_DEFAULT_CALLBACKS is defined, when this + * parameter is null, will fall back to ryml's default + * error implementation. + */ + Callbacks(void *user_data, pfn_allocate alloc, pfn_free free, pfn_error error); bool operator!= (Callbacks const& that) const { return !operator==(that); } bool operator== (Callbacks const& that) const @@ -18668,42 +20609,60 @@ struct RYML_EXPORT Callbacks } }; -/** set the global callbacks. - * - * @warning the error callback must never return: it must either abort - * or throw an exception. Otherwise, the parser will enter into an - * infinite loop, or the program may crash. */ -RYML_EXPORT void set_callbacks(Callbacks const& c); -/// get the global callbacks -RYML_EXPORT Callbacks const& get_callbacks(); -/// set the global callbacks back to their defaults -RYML_EXPORT void reset_callbacks(); + +/** @} */ + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- /// @cond dev + +// BEWARE! MSVC requires that [[noreturn]] appears before RYML_EXPORT +[[noreturn]] RYML_EXPORT void error(Callbacks const& cb, const char *msg, size_t msg_len, Location loc); +[[noreturn]] RYML_EXPORT void error(const char *msg, size_t msg_len, Location loc); + +[[noreturn]] inline void error(const char *msg, size_t msg_len) +{ + error(msg, msg_len, Location{}); +} +template +[[noreturn]] inline void error(const char (&msg)[N], Location loc) +{ + error(msg, N-1, loc); +} +template +[[noreturn]] inline void error(const char (&msg)[N]) +{ + error(msg, N-1, Location{}); +} + #define _RYML_CB_ERR(cb, msg_literal) \ + _RYML_CB_ERR_(cb, msg_literal, c4::yml::Location(__FILE__, 0, __LINE__, 0)) +#define _RYML_CB_CHECK(cb, cond) \ + _RYML_CB_CHECK_(cb, cond, c4::yml::Location(__FILE__, 0, __LINE__, 0)) +#define _RYML_CB_ERR_(cb, msg_literal, loc) \ do \ { \ const char msg[] = msg_literal; \ - RYML_DEBUG_BREAK() \ - (cb).m_error(msg, sizeof(msg), c4::yml::Location(__FILE__, 0, __LINE__, 0), (cb).m_user_data); \ + RYML_DEBUG_BREAK(); \ + c4::yml::error((cb), msg, sizeof(msg)-1, loc); \ + C4_UNREACHABLE_AFTER_ERR(); \ } while(0) -#define _RYML_CB_CHECK(cb, cond) \ +#define _RYML_CB_CHECK_(cb, cond, loc) \ do \ { \ - if(!(cond)) \ + if(C4_UNLIKELY(!(cond))) \ { \ const char msg[] = "check failed: " #cond; \ - RYML_DEBUG_BREAK() \ - (cb).m_error(msg, sizeof(msg), c4::yml::Location(__FILE__, 0, __LINE__, 0), (cb).m_user_data); \ + RYML_DEBUG_BREAK(); \ + c4::yml::error((cb), msg, sizeof(msg)-1, loc); \ + C4_UNREACHABLE_AFTER_ERR(); \ } \ } while(0) -#ifdef RYML_USE_ASSERT -#define _RYML_CB_ASSERT(cb, cond) _RYML_CB_CHECK((cb), (cond)) -#else -#define _RYML_CB_ASSERT(cb, cond) do {} while(0) -#endif #define _RYML_CB_ALLOC_HINT(cb, T, num, hint) (T*) (cb).m_allocate((num) * sizeof(T), (hint), (cb).m_user_data) -#define _RYML_CB_ALLOC(cb, T, num) _RYML_CB_ALLOC_HINT((cb), (T), (num), nullptr) +#define _RYML_CB_ALLOC(cb, T, num) _RYML_CB_ALLOC_HINT((cb), T, (num), nullptr) #define _RYML_CB_FREE(cb, buf, T, num) \ do { \ (cb).m_free((buf), (num) * sizeof(T), (cb).m_user_data); \ @@ -18712,7 +20671,50 @@ do \ +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +typedef enum { + BLOCK_LITERAL, //!< keep newlines (|) + BLOCK_FOLD //!< replace newline with single space (>) +} BlockStyle_e; + +typedef enum { + CHOMP_CLIP, //!< single newline at end (default) + CHOMP_STRIP, //!< no newline at end (-) + CHOMP_KEEP //!< all newlines from end (+) +} BlockChomp_e; + + +/** Abstracts the fact that a scalar filter result may not fit in the + * intended memory. */ +struct FilterResult +{ + C4_ALWAYS_INLINE bool valid() const noexcept { return str.str != nullptr; } + C4_ALWAYS_INLINE size_t required_len() const noexcept { return str.len; } + C4_ALWAYS_INLINE csubstr get() { RYML_ASSERT(valid()); return str; } + csubstr str; +}; +/** Abstracts the fact that a scalar filter result may not fit in the + * intended memory. */ +struct FilterResultExtending +{ + C4_ALWAYS_INLINE bool valid() const noexcept { return str.str != nullptr; } + C4_ALWAYS_INLINE size_t required_len() const noexcept { return reqlen; } + C4_ALWAYS_INLINE csubstr get() { RYML_ASSERT(valid()); return str; } + csubstr str; + size_t reqlen; +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + + namespace detail { +// is there a better way to do this? template struct _charconstant_t : public std::conditional::value, @@ -18728,23 +20730,29 @@ struct _SubstrWriter { substr buf; size_t pos; - _SubstrWriter(substr buf_, size_t pos_=0) : buf(buf_), pos(pos_) {} + _SubstrWriter(substr buf_, size_t pos_=0) : buf(buf_), pos(pos_) { C4_ASSERT(buf.str); } void append(csubstr s) { C4_ASSERT(!s.overlaps(buf)); - if(pos + s.len <= buf.len) + C4_ASSERT(s.str || !s.len); + if(s.len && pos + s.len <= buf.len) + { + C4_ASSERT(s.str); memcpy(buf.str + pos, s.str, s.len); + } pos += s.len; } void append(char c) { + C4_ASSERT(buf.str); if(pos < buf.len) buf.str[pos] = c; ++pos; } void append_n(char c, size_t numtimes) { - if(pos + numtimes < buf.len) + C4_ASSERT(buf.str); + if(numtimes && pos + numtimes < buf.len) memset(buf.str + pos, c, numtimes); pos += numtimes; } @@ -18759,42 +20767,91 @@ struct _SubstrWriter }; } // namespace detail -/// @endcond - -} // namespace yml -} // namespace c4 -#endif /* _C4_YML_COMMON_HPP_ */ - - -// (end https://github.com/biojppm/rapidyaml/src/c4/yml/common.hpp) +namespace detail { +// dumpfn is a function abstracting prints to terminal (or to string). +template +C4_NO_INLINE void _dump(DumpFn &&dumpfn, csubstr fmt, Args&& ...args) +{ + DumpResults results; + // try writing everything: + { + // buffer for converting individual arguments. it is defined + // in a child scope to free it in case the buffer is too small + // for any of the arguments. + char writebuf[RYML_LOGBUF_SIZE]; + results = format_dump_resume(std::forward(dumpfn), writebuf, fmt, std::forward(args)...); + } + // if any of the arguments failed to fit the buffer, allocate a + // larger buffer (up to a limit) and resume writing. + // + // results.bufsize is set to the size of the largest element + // serialized. Eg int(1) will require 1 byte. + if(C4_UNLIKELY(results.bufsize > RYML_LOGBUF_SIZE)) + { + const size_t bufsize = results.bufsize <= RYML_LOGBUF_SIZE_MAX ? results.bufsize : RYML_LOGBUF_SIZE_MAX; + #ifdef C4_MSVC + substr largerbuf = {static_cast(_alloca(bufsize)), bufsize}; + #else + substr largerbuf = {static_cast(alloca(bufsize)), bufsize}; + #endif + results = format_dump_resume(std::forward(dumpfn), results, largerbuf, fmt, std::forward(args)...); + } +} +template +C4_NORETURN C4_NO_INLINE void _report_err(Callbacks const& C4_RESTRICT callbacks, csubstr fmt, Args const& C4_RESTRICT ...args) +{ + char errmsg[RYML_ERRMSG_SIZE] = {0}; + detail::_SubstrWriter writer(errmsg); + auto dumpfn = [&writer](csubstr s){ writer.append(s); }; + _dump(dumpfn, fmt, args...); + writer.append('\n'); + const size_t len = writer.pos < RYML_ERRMSG_SIZE ? writer.pos : RYML_ERRMSG_SIZE; + callbacks.m_error(errmsg, len, {}, callbacks.m_user_data); + C4_UNREACHABLE_AFTER_ERR(); +} +} // namespace detail + + +inline csubstr _c4prc(const char &C4_RESTRICT c) // pass by reference! +{ + switch(c) + { + case '\n': return csubstr("\\n"); + case '\t': return csubstr("\\t"); + case '\0': return csubstr("\\0"); + case '\r': return csubstr("\\r"); + case '\f': return csubstr("\\f"); + case '\b': return csubstr("\\b"); + case '\v': return csubstr("\\v"); + case '\a': return csubstr("\\a"); + default: return csubstr(&c, 1); + } +} + +/// @endcond + +C4_SUPPRESS_WARNING_GCC_POP + +} // namespace yml +} // namespace c4 + +#endif /* _C4_YML_COMMON_HPP_ */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/common.hpp) //******************************************************************************** //-------------------------------------------------------------------------------- -// src/c4/yml/tree.hpp -// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp +// src/c4/yml/node_type.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/node_type.hpp //-------------------------------------------------------------------------------- //******************************************************************************** -#ifndef _C4_YML_TREE_HPP_ -#define _C4_YML_TREE_HPP_ - - -// amalgamate: removed include of -// https://github.com/biojppm/rapidyaml/src/c4/error.hpp -//#include "c4/error.hpp" -#if !defined(C4_ERROR_HPP_) && !defined(_C4_ERROR_HPP_) -#error "amalgamate: file c4/error.hpp must have been included at this point" -#endif /* C4_ERROR_HPP_ */ - -// amalgamate: removed include of -// https://github.com/biojppm/rapidyaml/src/c4/types.hpp -//#include "c4/types.hpp" -#if !defined(C4_TYPES_HPP_) && !defined(_C4_TYPES_HPP_) -#error "amalgamate: file c4/types.hpp must have been included at this point" -#endif /* C4_TYPES_HPP_ */ +#ifndef C4_YML_NODE_TYPE_HPP_ +#define C4_YML_NODE_TYPE_HPP_ #ifndef _C4_YML_COMMON_HPP_ // amalgamate: removed include of @@ -18806,207 +20863,113 @@ struct _SubstrWriter #endif -// amalgamate: removed include of -// https://github.com/biojppm/rapidyaml/src/c4/charconv.hpp -//#include -#if !defined(C4_CHARCONV_HPP_) && !defined(_C4_CHARCONV_HPP_) -#error "amalgamate: file c4/charconv.hpp must have been included at this point" -#endif /* C4_CHARCONV_HPP_ */ - -//included above: -//#include -//included above: -//#include - - C4_SUPPRESS_WARNING_MSVC_PUSH -C4_SUPPRESS_WARNING_MSVC(4251) // needs to have dll-interface to be used by clients of struct -C4_SUPPRESS_WARNING_MSVC(4296) // expression is always 'boolean_value' C4_SUPPRESS_WARNING_GCC_CLANG_PUSH -C4_SUPPRESS_WARNING_GCC("-Wtype-limits") - +C4_SUPPRESS_WARNING_GCC_CLANG("-Wold-style-cast") namespace c4 { namespace yml { -struct NodeScalar; -struct NodeInit; -struct NodeData; -class NodeRef; -class ConstNodeRef; -class Tree; - - -/** encode a floating point value to a string. */ -template -size_t to_chars_float(substr buf, T val) -{ - C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wfloat-equal"); - static_assert(std::is_floating_point::value, "must be floating point"); - if(C4_UNLIKELY(std::isnan(val))) - return to_chars(buf, csubstr(".nan")); - else if(C4_UNLIKELY(val == std::numeric_limits::infinity())) - return to_chars(buf, csubstr(".inf")); - else if(C4_UNLIKELY(val == -std::numeric_limits::infinity())) - return to_chars(buf, csubstr("-.inf")); - return to_chars(buf, val); - C4_SUPPRESS_WARNING_GCC_CLANG_POP -} - - -/** decode a floating point from string. Accepts special values: .nan, - * .inf, -.inf */ -template -bool from_chars_float(csubstr buf, T *C4_RESTRICT val) -{ - static_assert(std::is_floating_point::value, "must be floating point"); - if(C4_LIKELY(from_chars(buf, val))) - { - return true; - } - else if(C4_UNLIKELY(buf == ".nan" || buf == ".NaN" || buf == ".NAN")) - { - *val = std::numeric_limits::quiet_NaN(); - return true; - } - else if(C4_UNLIKELY(buf == ".inf" || buf == ".Inf" || buf == ".INF")) - { - *val = std::numeric_limits::infinity(); - return true; - } - else if(C4_UNLIKELY(buf == "-.inf" || buf == "-.Inf" || buf == "-.INF")) - { - *val = -std::numeric_limits::infinity(); - return true; - } - else - { - return false; - } -} - - -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- - -/** the integral type necessary to cover all the bits marking node tags */ -using tag_bits = uint16_t; - -/** a bit mask for marking tags for types */ -typedef enum : tag_bits { - // container types - TAG_NONE = 0, - TAG_MAP = 1, /**< !!map Unordered set of key: value pairs without duplicates. @see https://yaml.org/type/map.html */ - TAG_OMAP = 2, /**< !!omap Ordered sequence of key: value pairs without duplicates. @see https://yaml.org/type/omap.html */ - TAG_PAIRS = 3, /**< !!pairs Ordered sequence of key: value pairs allowing duplicates. @see https://yaml.org/type/pairs.html */ - TAG_SET = 4, /**< !!set Unordered set of non-equal values. @see https://yaml.org/type/set.html */ - TAG_SEQ = 5, /**< !!seq Sequence of arbitrary values. @see https://yaml.org/type/seq.html */ - // scalar types - TAG_BINARY = 6, /**< !!binary A sequence of zero or more octets (8 bit values). @see https://yaml.org/type/binary.html */ - TAG_BOOL = 7, /**< !!bool Mathematical Booleans. @see https://yaml.org/type/bool.html */ - TAG_FLOAT = 8, /**< !!float Floating-point approximation to real numbers. https://yaml.org/type/float.html */ - TAG_INT = 9, /**< !!float Mathematical integers. https://yaml.org/type/int.html */ - TAG_MERGE = 10, /**< !!merge Specify one or more mapping to be merged with the current one. https://yaml.org/type/merge.html */ - TAG_NULL = 11, /**< !!null Devoid of value. https://yaml.org/type/null.html */ - TAG_STR = 12, /**< !!str A sequence of zero or more Unicode characters. https://yaml.org/type/str.html */ - TAG_TIMESTAMP = 13, /**< !!timestamp A point in time https://yaml.org/type/timestamp.html */ - TAG_VALUE = 14, /**< !!value Specify the default value of a mapping https://yaml.org/type/value.html */ - TAG_YAML = 15, /**< !!yaml Specify the default value of a mapping https://yaml.org/type/yaml.html */ -} YamlTag_e; - -YamlTag_e to_tag(csubstr tag); -csubstr from_tag(YamlTag_e tag); -csubstr from_tag_long(YamlTag_e tag); -csubstr normalize_tag(csubstr tag); -csubstr normalize_tag_long(csubstr tag); - -struct TagDirective -{ - /** Eg `!e!` in `%TAG !e! tag:example.com,2000:app/` */ - csubstr handle; - /** Eg `tag:example.com,2000:app/` in `%TAG !e! tag:example.com,2000:app/` */ - csubstr prefix; - /** The next node to which this tag directive applies */ - size_t next_node_id; -}; - -#ifndef RYML_MAX_TAG_DIRECTIVES -/** the maximum number of tag directives in a Tree */ -#define RYML_MAX_TAG_DIRECTIVES 4 -#endif - - +/** @addtogroup doc_node_type + * + * @{ + */ //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- -/** the integral type necessary to cover all the bits marking node types */ -using type_bits = uint64_t; +/** the integral type necessary to cover all the bits for NodeType_e */ +using type_bits = uint32_t; -/** a bit mask for marking node types */ +/** a bit mask for marking node types and styles */ typedef enum : type_bits { - // a convenience define, undefined below - #define c4bit(v) (type_bits(1) << v) - NOTYPE = 0, ///< no node type is set - VAL = c4bit(0), ///< a leaf node, has a (possibly empty) value - KEY = c4bit(1), ///< is member of a map, must have non-empty key - MAP = c4bit(2), ///< a map: a parent of keyvals - SEQ = c4bit(3), ///< a seq: a parent of vals - DOC = c4bit(4), ///< a document - STREAM = c4bit(5)|SEQ, ///< a stream: a seq of docs - KEYREF = c4bit(6), ///< a *reference: the key references an &anchor - VALREF = c4bit(7), ///< a *reference: the val references an &anchor - KEYANCH = c4bit(8), ///< the key has an &anchor - VALANCH = c4bit(9), ///< the val has an &anchor - KEYTAG = c4bit(10), ///< the key has an explicit tag/type - VALTAG = c4bit(11), ///< the val has an explicit tag/type - _TYMASK = c4bit(12)-1, // all the bits up to here - VALQUO = c4bit(12), ///< the val is quoted by '', "", > or | - KEYQUO = c4bit(13), ///< the key is quoted by '', "", > or | + #define __(v) (type_bits(1) << v) // a convenience define, undefined below + NOTYPE = 0, ///< no node type or style is set + KEY = __(0), ///< is member of a map, must have non-empty key + VAL = __(1), ///< a scalar: has a scalar (ie string) value, possibly empty. must be a leaf node, and cannot be MAP or SEQ + MAP = __(2), ///< a map: a parent of KEYVAL/KEYSEQ/KEYMAP nodes + SEQ = __(3), ///< a seq: a parent of VAL/SEQ/MAP nodes + DOC = __(4), ///< a document + STREAM = __(5)|SEQ, ///< a stream: a seq of docs + KEYREF = __(6), ///< a *reference: the key references an &anchor + VALREF = __(7), ///< a *reference: the val references an &anchor + KEYANCH = __(8), ///< the key has an &anchor + VALANCH = __(9), ///< the val has an &anchor + KEYTAG = __(10), ///< the key has a tag + VALTAG = __(11), ///< the val has a tag + _TYMASK = __(12)-1, ///< all the bits up to here + // + // unfiltered flags: + // + KEY_UNFILT = __(12), ///< the key scalar was left unfiltered; the parser was set not to filter. @see ParserOptions + VAL_UNFILT = __(13), ///< the val scalar was left unfiltered; the parser was set not to filter. @see ParserOptions + // + // style flags: + // + FLOW_SL = __(14), ///< mark container with single-line flow style (seqs as '[val1,val2], maps as '{key: val,key2: val2}') + FLOW_ML = __(15), ///< (NOT IMPLEMENTED, work in progress) mark container with multi-line flow style (seqs as '[\n val1,\n val2\n], maps as '{\n key: val,\n key2: val2\n}') + BLOCK = __(16), ///< mark container with block style (seqs as '- val\n', maps as 'key: val') + KEY_LITERAL = __(17), ///< mark key scalar as multiline, block literal | + VAL_LITERAL = __(18), ///< mark val scalar as multiline, block literal | + KEY_FOLDED = __(19), ///< mark key scalar as multiline, block folded > + VAL_FOLDED = __(20), ///< mark val scalar as multiline, block folded > + KEY_SQUO = __(21), ///< mark key scalar as single quoted ' + VAL_SQUO = __(22), ///< mark val scalar as single quoted ' + KEY_DQUO = __(23), ///< mark key scalar as double quoted " + VAL_DQUO = __(24), ///< mark val scalar as double quoted " + KEY_PLAIN = __(25), ///< mark key scalar as plain scalar (unquoted, even when multiline) + VAL_PLAIN = __(26), ///< mark val scalar as plain scalar (unquoted, even when multiline) + // + // type combination masks: + // KEYVAL = KEY|VAL, KEYSEQ = KEY|SEQ, KEYMAP = KEY|MAP, DOCMAP = DOC|MAP, DOCSEQ = DOC|SEQ, DOCVAL = DOC|VAL, + // + // style combination masks: + // + SCALAR_LITERAL = KEY_LITERAL|VAL_LITERAL, + SCALAR_FOLDED = KEY_FOLDED|VAL_FOLDED, + SCALAR_SQUO = KEY_SQUO|VAL_SQUO, + SCALAR_DQUO = KEY_DQUO|VAL_DQUO, + SCALAR_PLAIN = KEY_PLAIN|VAL_PLAIN, + KEYQUO = KEY_SQUO|KEY_DQUO|KEY_FOLDED|KEY_LITERAL, ///< key style is one of ', ", > or | + VALQUO = VAL_SQUO|VAL_DQUO|VAL_FOLDED|VAL_LITERAL, ///< val style is one of ', ", > or | + KEY_STYLE = KEY_LITERAL|KEY_FOLDED|KEY_SQUO|KEY_DQUO|KEY_PLAIN, ///< mask of all the scalar styles for key (not container styles!) + VAL_STYLE = VAL_LITERAL|VAL_FOLDED|VAL_SQUO|VAL_DQUO|VAL_PLAIN, ///< mask of all the scalar styles for val (not container styles!) + SCALAR_STYLE = KEY_STYLE|VAL_STYLE, + CONTAINER_STYLE_FLOW = FLOW_SL|FLOW_ML, + CONTAINER_STYLE_BLOCK = BLOCK, + CONTAINER_STYLE = FLOW_SL|FLOW_ML|BLOCK, + STYLE = SCALAR_STYLE | CONTAINER_STYLE, + // + // mixed masks _KEYMASK = KEY | KEYQUO | KEYANCH | KEYREF | KEYTAG, _VALMASK = VAL | VALQUO | VALANCH | VALREF | VALTAG, - // these flags are from a work in progress and should not be used yet - _WIP_STYLE_FLOW_SL = c4bit(14), ///< mark container with single-line flow format (seqs as '[val1,val2], maps as '{key: val, key2: val2}') - _WIP_STYLE_FLOW_ML = c4bit(15), ///< mark container with multi-line flow format (seqs as '[val1,\nval2], maps as '{key: val,\nkey2: val2}') - _WIP_STYLE_BLOCK = c4bit(16), ///< mark container with block format (seqs as '- val\n', maps as 'key: val') - _WIP_KEY_LITERAL = c4bit(17), ///< mark key scalar as multiline, block literal | - _WIP_VAL_LITERAL = c4bit(18), ///< mark val scalar as multiline, block literal | - _WIP_KEY_FOLDED = c4bit(19), ///< mark key scalar as multiline, block folded > - _WIP_VAL_FOLDED = c4bit(20), ///< mark val scalar as multiline, block folded > - _WIP_KEY_SQUO = c4bit(21), ///< mark key scalar as single quoted - _WIP_VAL_SQUO = c4bit(22), ///< mark val scalar as single quoted - _WIP_KEY_DQUO = c4bit(23), ///< mark key scalar as double quoted - _WIP_VAL_DQUO = c4bit(24), ///< mark val scalar as double quoted - _WIP_KEY_PLAIN = c4bit(25), ///< mark key scalar as plain scalar (unquoted, even when multiline) - _WIP_VAL_PLAIN = c4bit(26), ///< mark val scalar as plain scalar (unquoted, even when multiline) - _WIP_KEY_STYLE = _WIP_KEY_LITERAL|_WIP_KEY_FOLDED|_WIP_KEY_SQUO|_WIP_KEY_DQUO|_WIP_KEY_PLAIN, - _WIP_VAL_STYLE = _WIP_VAL_LITERAL|_WIP_VAL_FOLDED|_WIP_VAL_SQUO|_WIP_VAL_DQUO|_WIP_VAL_PLAIN, - _WIP_KEY_FT_NL = c4bit(27), ///< features: mark key scalar as having \n in its contents - _WIP_VAL_FT_NL = c4bit(28), ///< features: mark val scalar as having \n in its contents - _WIP_KEY_FT_SQ = c4bit(29), ///< features: mark key scalar as having single quotes in its contents - _WIP_VAL_FT_SQ = c4bit(30), ///< features: mark val scalar as having single quotes in its contents - _WIP_KEY_FT_DQ = c4bit(31), ///< features: mark key scalar as having double quotes in its contents - _WIP_VAL_FT_DQ = c4bit(32), ///< features: mark val scalar as having double quotes in its contents - #undef c4bit + #undef __ } NodeType_e; +constexpr C4_ALWAYS_INLINE C4_CONST NodeType_e operator| (NodeType_e lhs, NodeType_e rhs) noexcept { return (NodeType_e)(((type_bits)lhs) | ((type_bits)rhs)); } +constexpr C4_ALWAYS_INLINE C4_CONST NodeType_e operator& (NodeType_e lhs, NodeType_e rhs) noexcept { return (NodeType_e)(((type_bits)lhs) & ((type_bits)rhs)); } +constexpr C4_ALWAYS_INLINE C4_CONST NodeType_e operator>> (NodeType_e bits, uint32_t n) noexcept { return (NodeType_e)(((type_bits)bits) >> n); } +constexpr C4_ALWAYS_INLINE C4_CONST NodeType_e operator<< (NodeType_e bits, uint32_t n) noexcept { return (NodeType_e)(((type_bits)bits) << n); } +constexpr C4_ALWAYS_INLINE C4_CONST NodeType_e operator~ (NodeType_e bits) noexcept { return (NodeType_e)(~(type_bits)bits); } +C4_ALWAYS_INLINE NodeType_e& operator&= (NodeType_e &subject, NodeType_e bits) noexcept { subject = (NodeType_e)((type_bits)subject & (type_bits)bits); return subject; } +C4_ALWAYS_INLINE NodeType_e& operator|= (NodeType_e &subject, NodeType_e bits) noexcept { subject = (NodeType_e)((type_bits)subject | (type_bits)bits); return subject; } + //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- /** wraps a NodeType_e element with some syntactic sugar and predicates */ -struct NodeType +struct RYML_EXPORT NodeType { public: @@ -19014,92 +20977,106 @@ struct NodeType public: - C4_ALWAYS_INLINE NodeType() : type(NOTYPE) {} - C4_ALWAYS_INLINE NodeType(NodeType_e t) : type(t) {} - C4_ALWAYS_INLINE NodeType(type_bits t) : type((NodeType_e)t) {} + C4_ALWAYS_INLINE NodeType() noexcept : type(NOTYPE) {} + C4_ALWAYS_INLINE NodeType(NodeType_e t) noexcept : type(t) {} + C4_ALWAYS_INLINE NodeType(type_bits t) noexcept : type((NodeType_e)t) {} - C4_ALWAYS_INLINE const char *type_str() const { return type_str(type); } - static const char* type_str(NodeType_e t); + C4_ALWAYS_INLINE bool has_any(NodeType_e t) const noexcept { return (type & t) != 0u; } + C4_ALWAYS_INLINE bool has_all(NodeType_e t) const noexcept { return (type & t) == t; } + C4_ALWAYS_INLINE bool has_none(NodeType_e t) const noexcept { return (type & t) == 0; } - C4_ALWAYS_INLINE void set(NodeType_e t) { type = t; } - C4_ALWAYS_INLINE void set(type_bits t) { type = (NodeType_e)t; } + C4_ALWAYS_INLINE void set(NodeType_e t) noexcept { type = t; } + C4_ALWAYS_INLINE void add(NodeType_e t) noexcept { type = (type|t); } + C4_ALWAYS_INLINE void rem(NodeType_e t) noexcept { type = (type & ~t); } + C4_ALWAYS_INLINE void addrem(NodeType_e bits_to_add, NodeType_e bits_to_remove) noexcept { type |= bits_to_add; type &= ~bits_to_remove; } - C4_ALWAYS_INLINE void add(NodeType_e t) { type = (NodeType_e)(type|t); } - C4_ALWAYS_INLINE void add(type_bits t) { type = (NodeType_e)(type|t); } + C4_ALWAYS_INLINE void clear() noexcept { type = NOTYPE; } - C4_ALWAYS_INLINE void rem(NodeType_e t) { type = (NodeType_e)(type & ~t); } - C4_ALWAYS_INLINE void rem(type_bits t) { type = (NodeType_e)(type & ~t); } +public: - C4_ALWAYS_INLINE void clear() { type = NOTYPE; } + C4_ALWAYS_INLINE operator NodeType_e & C4_RESTRICT () noexcept { return type; } + C4_ALWAYS_INLINE operator NodeType_e const& C4_RESTRICT () const noexcept { return type; } public: - C4_ALWAYS_INLINE operator NodeType_e & C4_RESTRICT () { return type; } - C4_ALWAYS_INLINE operator NodeType_e const& C4_RESTRICT () const { return type; } + /** @name node type queries + * @{ */ - C4_ALWAYS_INLINE bool operator== (NodeType_e t) const { return type == t; } - C4_ALWAYS_INLINE bool operator!= (NodeType_e t) const { return type != t; } + /** return a preset string based on the node type */ + C4_ALWAYS_INLINE const char *type_str() const noexcept { return type_str(type); } + /** return a preset string based on the node type */ + static const char* type_str(NodeType_e t) noexcept; + + /** fill a string with the node type flags. If the string is small, returns {null, len} */ + C4_ALWAYS_INLINE csubstr type_str(substr buf) const noexcept { return type_str(buf, type); } + /** fill a string with the node type flags. If the string is small, returns {null, len} */ + static csubstr type_str(substr buf, NodeType_e t) noexcept; public: - #if defined(__clang__) - # pragma clang diagnostic push - # pragma clang diagnostic ignored "-Wnull-dereference" - #elif defined(__GNUC__) - # pragma GCC diagnostic push - # if __GNUC__ >= 6 - # pragma GCC diagnostic ignored "-Wnull-dereference" - # endif - #endif + /** @name node type queries + * @{ */ + + C4_ALWAYS_INLINE bool is_notype() const noexcept { return type == NOTYPE; } + C4_ALWAYS_INLINE bool is_stream() const noexcept { return ((type & STREAM) == STREAM) != 0; } + C4_ALWAYS_INLINE bool is_doc() const noexcept { return (type & DOC) != 0; } + C4_ALWAYS_INLINE bool is_container() const noexcept { return (type & (MAP|SEQ|STREAM)) != 0; } + C4_ALWAYS_INLINE bool is_map() const noexcept { return (type & MAP) != 0; } + C4_ALWAYS_INLINE bool is_seq() const noexcept { return (type & SEQ) != 0; } + C4_ALWAYS_INLINE bool has_key() const noexcept { return (type & KEY) != 0; } + C4_ALWAYS_INLINE bool has_val() const noexcept { return (type & VAL) != 0; } + C4_ALWAYS_INLINE bool is_val() const noexcept { return (type & KEYVAL) == VAL; } + C4_ALWAYS_INLINE bool is_keyval() const noexcept { return (type & KEYVAL) == KEYVAL; } + C4_ALWAYS_INLINE bool has_key_tag() const noexcept { return (type & KEYTAG) != 0; } + C4_ALWAYS_INLINE bool has_val_tag() const noexcept { return (type & VALTAG) != 0; } + C4_ALWAYS_INLINE bool has_key_anchor() const noexcept { return (type & KEYANCH) != 0; } + C4_ALWAYS_INLINE bool has_val_anchor() const noexcept { return (type & VALANCH) != 0; } + C4_ALWAYS_INLINE bool has_anchor() const noexcept { return (type & (KEYANCH|VALANCH)) != 0; } + C4_ALWAYS_INLINE bool is_key_ref() const noexcept { return (type & KEYREF) != 0; } + C4_ALWAYS_INLINE bool is_val_ref() const noexcept { return (type & VALREF) != 0; } + C4_ALWAYS_INLINE bool is_ref() const noexcept { return (type & (KEYREF|VALREF)) != 0; } + + C4_ALWAYS_INLINE bool is_key_unfiltered() const noexcept { return (type & (KEY_UNFILT)) != 0; } + C4_ALWAYS_INLINE bool is_val_unfiltered() const noexcept { return (type & (VAL_UNFILT)) != 0; } + + RYML_DEPRECATED("use has_key_anchor()") bool is_key_anchor() const noexcept { return has_key_anchor(); } + RYML_DEPRECATED("use has_val_anchor()") bool is_val_anchor() const noexcept { return has_val_anchor(); } + RYML_DEPRECATED("use has_anchor()") bool is_anchor() const noexcept { return has_anchor(); } + RYML_DEPRECATED("use has_anchor() || is_ref()") bool is_anchor_or_ref() const noexcept { return has_anchor() || is_ref(); } + /** @} */ - C4_ALWAYS_INLINE bool is_notype() const { return type == NOTYPE; } - C4_ALWAYS_INLINE bool is_stream() const { return ((type & STREAM) == STREAM) != 0; } - C4_ALWAYS_INLINE bool is_doc() const { return (type & DOC) != 0; } - C4_ALWAYS_INLINE bool is_container() const { return (type & (MAP|SEQ|STREAM)) != 0; } - C4_ALWAYS_INLINE bool is_map() const { return (type & MAP) != 0; } - C4_ALWAYS_INLINE bool is_seq() const { return (type & SEQ) != 0; } - C4_ALWAYS_INLINE bool has_key() const { return (type & KEY) != 0; } - C4_ALWAYS_INLINE bool has_val() const { return (type & VAL) != 0; } - C4_ALWAYS_INLINE bool is_val() const { return (type & KEYVAL) == VAL; } - C4_ALWAYS_INLINE bool is_keyval() const { return (type & KEYVAL) == KEYVAL; } - C4_ALWAYS_INLINE bool has_key_tag() const { return (type & (KEY|KEYTAG)) == (KEY|KEYTAG); } - C4_ALWAYS_INLINE bool has_val_tag() const { return ((type & VALTAG) && (type & (VAL|MAP|SEQ))); } - C4_ALWAYS_INLINE bool has_key_anchor() const { return (type & (KEY|KEYANCH)) == (KEY|KEYANCH); } - C4_ALWAYS_INLINE bool is_key_anchor() const { return (type & (KEY|KEYANCH)) == (KEY|KEYANCH); } - C4_ALWAYS_INLINE bool has_val_anchor() const { return (type & VALANCH) != 0 && (type & (VAL|SEQ|MAP)) != 0; } - C4_ALWAYS_INLINE bool is_val_anchor() const { return (type & VALANCH) != 0 && (type & (VAL|SEQ|MAP)) != 0; } - C4_ALWAYS_INLINE bool has_anchor() const { return (type & (KEYANCH|VALANCH)) != 0; } - C4_ALWAYS_INLINE bool is_anchor() const { return (type & (KEYANCH|VALANCH)) != 0; } - C4_ALWAYS_INLINE bool is_key_ref() const { return (type & KEYREF) != 0; } - C4_ALWAYS_INLINE bool is_val_ref() const { return (type & VALREF) != 0; } - C4_ALWAYS_INLINE bool is_ref() const { return (type & (KEYREF|VALREF)) != 0; } - C4_ALWAYS_INLINE bool is_anchor_or_ref() const { return (type & (KEYANCH|VALANCH|KEYREF|VALREF)) != 0; } - C4_ALWAYS_INLINE bool is_key_quoted() const { return (type & (KEY|KEYQUO)) == (KEY|KEYQUO); } - C4_ALWAYS_INLINE bool is_val_quoted() const { return (type & (VAL|VALQUO)) == (VAL|VALQUO); } - C4_ALWAYS_INLINE bool is_quoted() const { return (type & (KEY|KEYQUO)) == (KEY|KEYQUO) || (type & (VAL|VALQUO)) == (VAL|VALQUO); } - - // these predicates are a work in progress and subject to change. Don't use yet. - C4_ALWAYS_INLINE bool default_block() const { return (type & (_WIP_STYLE_BLOCK|_WIP_STYLE_FLOW_ML|_WIP_STYLE_FLOW_SL)) == 0; } - C4_ALWAYS_INLINE bool marked_block() const { return (type & (_WIP_STYLE_BLOCK)) != 0; } - C4_ALWAYS_INLINE bool marked_flow_sl() const { return (type & (_WIP_STYLE_FLOW_SL)) != 0; } - C4_ALWAYS_INLINE bool marked_flow_ml() const { return (type & (_WIP_STYLE_FLOW_ML)) != 0; } - C4_ALWAYS_INLINE bool marked_flow() const { return (type & (_WIP_STYLE_FLOW_ML|_WIP_STYLE_FLOW_SL)) != 0; } - C4_ALWAYS_INLINE bool key_marked_literal() const { return (type & (_WIP_KEY_LITERAL)) != 0; } - C4_ALWAYS_INLINE bool val_marked_literal() const { return (type & (_WIP_VAL_LITERAL)) != 0; } - C4_ALWAYS_INLINE bool key_marked_folded() const { return (type & (_WIP_KEY_FOLDED)) != 0; } - C4_ALWAYS_INLINE bool val_marked_folded() const { return (type & (_WIP_VAL_FOLDED)) != 0; } - C4_ALWAYS_INLINE bool key_marked_squo() const { return (type & (_WIP_KEY_SQUO)) != 0; } - C4_ALWAYS_INLINE bool val_marked_squo() const { return (type & (_WIP_VAL_SQUO)) != 0; } - C4_ALWAYS_INLINE bool key_marked_dquo() const { return (type & (_WIP_KEY_DQUO)) != 0; } - C4_ALWAYS_INLINE bool val_marked_dquo() const { return (type & (_WIP_VAL_DQUO)) != 0; } - C4_ALWAYS_INLINE bool key_marked_plain() const { return (type & (_WIP_KEY_PLAIN)) != 0; } - C4_ALWAYS_INLINE bool val_marked_plain() const { return (type & (_WIP_VAL_PLAIN)) != 0; } +public: - #if defined(__clang__) - # pragma clang diagnostic pop - #elif defined(__GNUC__) - # pragma GCC diagnostic pop - #endif + /** @name container+scalar style queries + * @{ */ + + C4_ALWAYS_INLINE bool is_container_styled() const noexcept { return (type & (CONTAINER_STYLE)) != 0; } + C4_ALWAYS_INLINE bool is_block() const noexcept { return (type & (BLOCK)) != 0; } + C4_ALWAYS_INLINE bool is_flow_sl() const noexcept { return (type & (FLOW_SL)) != 0; } + C4_ALWAYS_INLINE bool is_flow_ml() const noexcept { return (type & (FLOW_ML)) != 0; } + C4_ALWAYS_INLINE bool is_flow() const noexcept { return (type & (FLOW_ML|FLOW_SL)) != 0; } + + C4_ALWAYS_INLINE bool is_key_styled() const noexcept { return (type & (KEY_STYLE)) != 0; } + C4_ALWAYS_INLINE bool is_val_styled() const noexcept { return (type & (VAL_STYLE)) != 0; } + C4_ALWAYS_INLINE bool is_key_literal() const noexcept { return (type & (KEY_LITERAL)) != 0; } + C4_ALWAYS_INLINE bool is_val_literal() const noexcept { return (type & (VAL_LITERAL)) != 0; } + C4_ALWAYS_INLINE bool is_key_folded() const noexcept { return (type & (KEY_FOLDED)) != 0; } + C4_ALWAYS_INLINE bool is_val_folded() const noexcept { return (type & (VAL_FOLDED)) != 0; } + C4_ALWAYS_INLINE bool is_key_squo() const noexcept { return (type & (KEY_SQUO)) != 0; } + C4_ALWAYS_INLINE bool is_val_squo() const noexcept { return (type & (VAL_SQUO)) != 0; } + C4_ALWAYS_INLINE bool is_key_dquo() const noexcept { return (type & (KEY_DQUO)) != 0; } + C4_ALWAYS_INLINE bool is_val_dquo() const noexcept { return (type & (VAL_DQUO)) != 0; } + C4_ALWAYS_INLINE bool is_key_plain() const noexcept { return (type & (KEY_PLAIN)) != 0; } + C4_ALWAYS_INLINE bool is_val_plain() const noexcept { return (type & (VAL_PLAIN)) != 0; } + C4_ALWAYS_INLINE bool is_key_quoted() const noexcept { return (type & KEYQUO) != 0; } + C4_ALWAYS_INLINE bool is_val_quoted() const noexcept { return (type & VALQUO) != 0; } + C4_ALWAYS_INLINE bool is_quoted() const noexcept { return (type & (KEYQUO|VALQUO)) != 0; } + + C4_ALWAYS_INLINE void set_container_style(NodeType_e style) noexcept { type = ((style & CONTAINER_STYLE) | (type & ~CONTAINER_STYLE)); } + C4_ALWAYS_INLINE void set_key_style(NodeType_e style) noexcept { type = ((style & KEY_STYLE) | (type & ~KEY_STYLE)); } + C4_ALWAYS_INLINE void set_val_style(NodeType_e style) noexcept { type = ((style & VAL_STYLE) | (type & ~VAL_STYLE)); } + + /** @} */ }; @@ -19108,572 +21085,930 @@ struct NodeType //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- -/** a node scalar is a csubstr, which may be tagged and anchored. */ -struct NodeScalar -{ - csubstr tag; - csubstr scalar; - csubstr anchor; +/** @name scalar style helpers + * @{ */ -public: +/** choose a YAML emitting style based on the scalar's contents */ +RYML_EXPORT NodeType_e scalar_style_choose(csubstr scalar) noexcept; - /// initialize as an empty scalar - inline NodeScalar() noexcept : tag(), scalar(), anchor() {} +/** choose a json style based on the scalar's contents */ +RYML_EXPORT NodeType_e scalar_style_json_choose(csubstr scalar) noexcept; - /// initialize as an untagged scalar - template - inline NodeScalar(const char (&s)[N]) noexcept : tag(), scalar(s), anchor() {} - inline NodeScalar(csubstr s ) noexcept : tag(), scalar(s), anchor() {} +/** query whether a scalar can be encoded using single quotes. + * It may not be possible, notably when there is leading + * whitespace after a newline. */ +RYML_EXPORT bool scalar_style_query_squo(csubstr s) noexcept; - /// initialize as a tagged scalar - template - inline NodeScalar(const char (&t)[N], const char (&s)[N]) noexcept : tag(t), scalar(s), anchor() {} - inline NodeScalar(csubstr t , csubstr s ) noexcept : tag(t), scalar(s), anchor() {} +/** query whether a scalar can be encoded using plain style (no + * quotes, not a literal/folded block scalar). */ +RYML_EXPORT bool scalar_style_query_plain(csubstr s) noexcept; -public: +/** YAML-sense query of nullity. returns true if the scalar points + * to `nullptr` or is otherwise equal to one of the strings + * `"~"`,`"null"`,`"Null"`,`"NULL"` */ +RYML_EXPORT inline C4_NO_INLINE bool scalar_is_null(csubstr s) noexcept +{ + return s.str == nullptr || + s == "~" || + s == "null" || + s == "Null" || + s == "NULL"; +} - ~NodeScalar() noexcept = default; - NodeScalar(NodeScalar &&) noexcept = default; - NodeScalar(NodeScalar const&) noexcept = default; - NodeScalar& operator= (NodeScalar &&) noexcept = default; - NodeScalar& operator= (NodeScalar const&) noexcept = default; +/** @} */ -public: - bool empty() const noexcept { return tag.empty() && scalar.empty() && anchor.empty(); } +/** @} */ - void clear() noexcept { tag.clear(); scalar.clear(); anchor.clear(); } +} // namespace yml +} // namespace c4 - void set_ref_maybe_replacing_scalar(csubstr ref, bool has_scalar) noexcept - { - csubstr trimmed = ref.begins_with('*') ? ref.sub(1) : ref; - anchor = trimmed; - if((!has_scalar) || !scalar.ends_with(trimmed)) - scalar = ref; - } -}; -C4_MUST_BE_TRIVIAL_COPY(NodeScalar); +C4_SUPPRESS_WARNING_MSVC_POP +C4_SUPPRESS_WARNING_GCC_CLANG_POP +#endif /* C4_YML_NODE_TYPE_HPP_ */ -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -/** convenience class to initialize nodes */ -struct NodeInit -{ - - NodeType type; - NodeScalar key; - NodeScalar val; - -public: +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/node_type.hpp) - /// initialize as an empty node - NodeInit() : type(NOTYPE), key(), val() {} - /// initialize as a typed node - NodeInit(NodeType_e t) : type(t), key(), val() {} - /// initialize as a sequence member - NodeInit(NodeScalar const& v) : type(VAL), key(), val(v) { _add_flags(); } - /// initialize as a mapping member - NodeInit( NodeScalar const& k, NodeScalar const& v) : type(KEYVAL), key(k.tag, k.scalar), val(v.tag, v.scalar) { _add_flags(); } - /// initialize as a mapping member with explicit type - NodeInit(NodeType_e t, NodeScalar const& k, NodeScalar const& v) : type(t ), key(k.tag, k.scalar), val(v.tag, v.scalar) { _add_flags(); } - /// initialize as a mapping member with explicit type (eg SEQ or MAP) - NodeInit(NodeType_e t, NodeScalar const& k ) : type(t ), key(k.tag, k.scalar), val( ) { _add_flags(KEY); } -public: - void clear() - { - type.clear(); - key.clear(); - val.clear(); - } +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/tag.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/tag.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** - void _add_flags(type_bits more_flags=0) - { - type = (type|more_flags); - if( ! key.tag.empty()) - type = (type|KEYTAG); - if( ! val.tag.empty()) - type = (type|VALTAG); - if( ! key.anchor.empty()) - type = (type|KEYANCH); - if( ! val.anchor.empty()) - type = (type|VALANCH); - } +#ifndef _C4_YML_TAG_HPP_ +#define _C4_YML_TAG_HPP_ - bool _check() const - { - // key cannot be empty - RYML_ASSERT(key.scalar.empty() == ((type & KEY) == 0)); - // key tag cannot be empty - RYML_ASSERT(key.tag.empty() == ((type & KEYTAG) == 0)); - // val may be empty even though VAL is set. But when VAL is not set, val must be empty - RYML_ASSERT(((type & VAL) != 0) || val.scalar.empty()); - // val tag cannot be empty - RYML_ASSERT(val.tag.empty() == ((type & VALTAG) == 0)); - return true; - } -}; +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/common.hpp +//#include +#if !defined(C4_YML_COMMON_HPP_) && !defined(_C4_YML_COMMON_HPP_) +#error "amalgamate: file c4/yml/common.hpp must have been included at this point" +#endif /* C4_YML_COMMON_HPP_ */ -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- +namespace c4 { +namespace yml { -/** contains the data for each YAML node. */ -struct NodeData -{ - NodeType m_type; +class Tree; - NodeScalar m_key; - NodeScalar m_val; +/** @addtogroup doc_tag_utils + * + * @{ + */ - size_t m_parent; - size_t m_first_child; - size_t m_last_child; - size_t m_next_sibling; - size_t m_prev_sibling; -}; -C4_MUST_BE_TRIVIAL_COPY(NodeData); +#ifndef RYML_MAX_TAG_DIRECTIVES +/** the maximum number of tag directives in a Tree */ +#define RYML_MAX_TAG_DIRECTIVES 4 +#endif -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- +/** the integral type necessary to cover all the bits marking node tags */ +using tag_bits = uint16_t; -class RYML_EXPORT Tree -{ -public: +/** a bit mask for marking tags for types */ +typedef enum : tag_bits { + TAG_NONE = 0, + // container types + TAG_MAP = 1, /**< !!map Unordered set of key: value pairs without duplicates. @see https://yaml.org/type/map.html */ + TAG_OMAP = 2, /**< !!omap Ordered sequence of key: value pairs without duplicates. @see https://yaml.org/type/omap.html */ + TAG_PAIRS = 3, /**< !!pairs Ordered sequence of key: value pairs allowing duplicates. @see https://yaml.org/type/pairs.html */ + TAG_SET = 4, /**< !!set Unordered set of non-equal values. @see https://yaml.org/type/set.html */ + TAG_SEQ = 5, /**< !!seq Sequence of arbitrary values. @see https://yaml.org/type/seq.html */ + // scalar types + TAG_BINARY = 6, /**< !!binary A sequence of zero or more octets (8 bit values). @see https://yaml.org/type/binary.html */ + TAG_BOOL = 7, /**< !!bool Mathematical Booleans. @see https://yaml.org/type/bool.html */ + TAG_FLOAT = 8, /**< !!float Floating-point approximation to real numbers. https://yaml.org/type/float.html */ + TAG_INT = 9, /**< !!float Mathematical integers. https://yaml.org/type/int.html */ + TAG_MERGE = 10, /**< !!merge Specify one or more mapping to be merged with the current one. https://yaml.org/type/merge.html */ + TAG_NULL = 11, /**< !!null Devoid of value. https://yaml.org/type/null.html */ + TAG_STR = 12, /**< !!str A sequence of zero or more Unicode characters. https://yaml.org/type/str.html */ + TAG_TIMESTAMP = 13, /**< !!timestamp A point in time https://yaml.org/type/timestamp.html */ + TAG_VALUE = 14, /**< !!value Specify the default value of a mapping https://yaml.org/type/value.html */ + TAG_YAML = 15, /**< !!yaml Specify the default value of a mapping https://yaml.org/type/yaml.html */ +} YamlTag_e; - /** @name construction and assignment */ - /** @{ */ +RYML_EXPORT YamlTag_e to_tag(csubstr tag); +RYML_EXPORT csubstr from_tag(YamlTag_e tag); +RYML_EXPORT csubstr from_tag_long(YamlTag_e tag); +RYML_EXPORT csubstr normalize_tag(csubstr tag); +RYML_EXPORT csubstr normalize_tag_long(csubstr tag); +RYML_EXPORT csubstr normalize_tag_long(csubstr tag, substr output); - Tree() : Tree(get_callbacks()) {} - Tree(Callbacks const& cb); - Tree(size_t node_capacity, size_t arena_capacity=0) : Tree(node_capacity, arena_capacity, get_callbacks()) {} - Tree(size_t node_capacity, size_t arena_capacity, Callbacks const& cb); +RYML_EXPORT bool is_custom_tag(csubstr tag); - ~Tree(); - Tree(Tree const& that) noexcept; - Tree(Tree && that) noexcept; +struct RYML_EXPORT TagDirective +{ + /** Eg `!e!` in `%TAG !e! tag:example.com,2000:app/` */ + csubstr handle; + /** Eg `tag:example.com,2000:app/` in `%TAG !e! tag:example.com,2000:app/` */ + csubstr prefix; + /** The next node to which this tag directive applies */ + id_type next_node_id; - Tree& operator= (Tree const& that) noexcept; - Tree& operator= (Tree && that) noexcept; + bool create_from_str(csubstr directive_); ///< leaves next_node_id unfilled + bool create_from_str(csubstr directive_, Tree *tree); + size_t transform(csubstr tag, substr output, Callbacks const& callbacks) const; +}; - /** @} */ +struct RYML_EXPORT TagDirectiveRange +{ + TagDirective const* C4_RESTRICT b; + TagDirective const* C4_RESTRICT e; + C4_ALWAYS_INLINE TagDirective const* begin() const noexcept { return b; } + C4_ALWAYS_INLINE TagDirective const* end() const noexcept { return e; } +}; -public: +/** @} */ - /** @name memory and sizing */ - /** @{ */ +} // namespace yml +} // namespace c4 - void reserve(size_t node_capacity); +#endif /* _C4_YML_TAG_HPP_ */ - /** clear the tree and zero every node - * @note does NOT clear the arena - * @see clear_arena() */ - void clear(); - inline void clear_arena() { m_arena_pos = 0; } - inline bool empty() const { return m_size == 0; } +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/tag.hpp) - inline size_t size() const { return m_size; } - inline size_t capacity() const { return m_cap; } - inline size_t slack() const { RYML_ASSERT(m_cap >= m_size); return m_cap - m_size; } - Callbacks const& callbacks() const { return m_callbacks; } - void callbacks(Callbacks const& cb) { m_callbacks = cb; } - /** @} */ +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/tree.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** -public: +#ifndef _C4_YML_TREE_HPP_ +#define _C4_YML_TREE_HPP_ - /** @name node getters */ - /** @{ */ +/** @file tree.hpp */ - //! get the index of a node belonging to this tree. - //! @p n can be nullptr, in which case a - size_t id(NodeData const* n) const - { - if( ! n) - { - return NONE; - } - RYML_ASSERT(n >= m_buf && n < m_buf + m_cap); - return static_cast(n - m_buf); - } +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/error.hpp +//#include "c4/error.hpp" +#if !defined(C4_ERROR_HPP_) && !defined(_C4_ERROR_HPP_) +#error "amalgamate: file c4/error.hpp must have been included at this point" +#endif /* C4_ERROR_HPP_ */ - //! get a pointer to a node's NodeData. - //! i can be NONE, in which case a nullptr is returned - inline NodeData *get(size_t i) - { - if(i == NONE) - return nullptr; - RYML_ASSERT(i >= 0 && i < m_cap); - return m_buf + i; - } - //! get a pointer to a node's NodeData. - //! i can be NONE, in which case a nullptr is returned. - inline NodeData const *get(size_t i) const - { - if(i == NONE) - return nullptr; - RYML_ASSERT(i >= 0 && i < m_cap); - return m_buf + i; - } +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/types.hpp +//#include "c4/types.hpp" +#if !defined(C4_TYPES_HPP_) && !defined(_C4_TYPES_HPP_) +#error "amalgamate: file c4/types.hpp must have been included at this point" +#endif /* C4_TYPES_HPP_ */ - //! An if-less form of get() that demands a valid node index. - //! This function is implementation only; use at your own risk. - inline NodeData * _p(size_t i) { RYML_ASSERT(i != NONE && i >= 0 && i < m_cap); return m_buf + i; } - //! An if-less form of get() that demands a valid node index. - //! This function is implementation only; use at your own risk. - inline NodeData const * _p(size_t i) const { RYML_ASSERT(i != NONE && i >= 0 && i < m_cap); return m_buf + i; } +#ifndef _C4_YML_FWD_HPP_ +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/fwd.hpp +//#include "c4/yml/fwd.hpp" +#if !defined(C4_YML_FWD_HPP_) && !defined(_C4_YML_FWD_HPP_) +#error "amalgamate: file c4/yml/fwd.hpp must have been included at this point" +#endif /* C4_YML_FWD_HPP_ */ - //! Get the id of the root node - size_t root_id() { if(m_cap == 0) { reserve(16); } RYML_ASSERT(m_cap > 0 && m_size > 0); return 0; } - //! Get the id of the root node - size_t root_id() const { RYML_ASSERT(m_cap > 0 && m_size > 0); return 0; } +#endif +#ifndef _C4_YML_COMMON_HPP_ +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/common.hpp +//#include "c4/yml/common.hpp" +#if !defined(C4_YML_COMMON_HPP_) && !defined(_C4_YML_COMMON_HPP_) +#error "amalgamate: file c4/yml/common.hpp must have been included at this point" +#endif /* C4_YML_COMMON_HPP_ */ - //! Get a NodeRef of a node by id - NodeRef ref(size_t id); - //! Get a NodeRef of a node by id - ConstNodeRef ref(size_t id) const; - //! Get a NodeRef of a node by id - ConstNodeRef cref(size_t id); - //! Get a NodeRef of a node by id - ConstNodeRef cref(size_t id) const; +#endif +#ifndef C4_YML_NODE_TYPE_HPP_ +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/node_type.hpp +//#include "c4/yml/node_type.hpp" +#if !defined(C4_YML_NODE_TYPE_HPP_) && !defined(_C4_YML_NODE_TYPE_HPP_) +#error "amalgamate: file c4/yml/node_type.hpp must have been included at this point" +#endif /* C4_YML_NODE_TYPE_HPP_ */ - //! Get the root as a NodeRef - NodeRef rootref(); - //! Get the root as a NodeRef - ConstNodeRef rootref() const; - //! Get the root as a NodeRef - ConstNodeRef crootref(); - //! Get the root as a NodeRef - ConstNodeRef crootref() const; +#endif +#ifndef _C4_YML_TAG_HPP_ +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/tag.hpp +//#include "c4/yml/tag.hpp" +#if !defined(C4_YML_TAG_HPP_) && !defined(_C4_YML_TAG_HPP_) +#error "amalgamate: file c4/yml/tag.hpp must have been included at this point" +#endif /* C4_YML_TAG_HPP_ */ - //! find a root child by name, return it as a NodeRef - //! @note requires the root to be a map. - NodeRef operator[] (csubstr key); - //! find a root child by name, return it as a NodeRef - //! @note requires the root to be a map. - ConstNodeRef operator[] (csubstr key) const; +#endif +#ifndef _C4_CHARCONV_HPP_ +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/charconv.hpp +//#include +#if !defined(C4_CHARCONV_HPP_) && !defined(_C4_CHARCONV_HPP_) +#error "amalgamate: file c4/charconv.hpp must have been included at this point" +#endif /* C4_CHARCONV_HPP_ */ - //! find a root child by index: return the root node's @p i-th child as a NodeRef - //! @note @i is NOT the node id, but the child's position - NodeRef operator[] (size_t i); - //! find a root child by index: return the root node's @p i-th child as a NodeRef - //! @note @i is NOT the node id, but the child's position - ConstNodeRef operator[] (size_t i) const; +#endif - //! get the i-th document of the stream - //! @note @i is NOT the node id, but the doc position within the stream - NodeRef docref(size_t i); - //! get the i-th document of the stream - //! @note @i is NOT the node id, but the doc position within the stream - ConstNodeRef docref(size_t i) const; +//included above: +//#include +//included above: +//#include - /** @} */ -public: +C4_SUPPRESS_WARNING_MSVC_PUSH +C4_SUPPRESS_WARNING_MSVC(4251) // needs to have dll-interface to be used by clients of struct +C4_SUPPRESS_WARNING_MSVC(4296) // expression is always 'boolean_value' +C4_SUPPRESS_WARNING_GCC_CLANG_PUSH +C4_SUPPRESS_WARNING_GCC_CLANG("-Wold-style-cast") +C4_SUPPRESS_WARNING_GCC("-Wuseless-cast") +C4_SUPPRESS_WARNING_GCC("-Wtype-limits") - /** @name node property getters */ - /** @{ */ - NodeType type(size_t node) const { return _p(node)->m_type; } - const char* type_str(size_t node) const { return NodeType::type_str(_p(node)->m_type); } +namespace c4 { +namespace yml { - csubstr const& key (size_t node) const { RYML_ASSERT(has_key(node)); return _p(node)->m_key.scalar; } - csubstr const& key_tag (size_t node) const { RYML_ASSERT(has_key_tag(node)); return _p(node)->m_key.tag; } - csubstr const& key_ref (size_t node) const { RYML_ASSERT(is_key_ref(node) && ! has_key_anchor(node)); return _p(node)->m_key.anchor; } - csubstr const& key_anchor(size_t node) const { RYML_ASSERT( ! is_key_ref(node) && has_key_anchor(node)); return _p(node)->m_key.anchor; } - NodeScalar const& keysc (size_t node) const { RYML_ASSERT(has_key(node)); return _p(node)->m_key; } +/** encode a floating point value to a string. */ +template +size_t to_chars_float(substr buf, T val) +{ + C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wfloat-equal"); + static_assert(std::is_floating_point::value, "must be floating point"); + if(C4_UNLIKELY(std::isnan(val))) + return to_chars(buf, csubstr(".nan")); + else if(C4_UNLIKELY(val == std::numeric_limits::infinity())) + return to_chars(buf, csubstr(".inf")); + else if(C4_UNLIKELY(val == -std::numeric_limits::infinity())) + return to_chars(buf, csubstr("-.inf")); + return to_chars(buf, val); + C4_SUPPRESS_WARNING_GCC_CLANG_POP +} - csubstr const& val (size_t node) const { RYML_ASSERT(has_val(node)); return _p(node)->m_val.scalar; } - csubstr const& val_tag (size_t node) const { RYML_ASSERT(has_val_tag(node)); return _p(node)->m_val.tag; } - csubstr const& val_ref (size_t node) const { RYML_ASSERT(is_val_ref(node) && ! has_val_anchor(node)); return _p(node)->m_val.anchor; } - csubstr const& val_anchor(size_t node) const { RYML_ASSERT( ! is_val_ref(node) && has_val_anchor(node)); return _p(node)->m_val.anchor; } - NodeScalar const& valsc (size_t node) const { RYML_ASSERT(has_val(node)); return _p(node)->m_val; } - /** @} */ +/** decode a floating point from string. Accepts special values: .nan, + * .inf, -.inf */ +template +bool from_chars_float(csubstr buf, T *C4_RESTRICT val) +{ + static_assert(std::is_floating_point::value, "must be floating point"); + if(C4_LIKELY(from_chars(buf, val))) + { + return true; + } + else if(C4_UNLIKELY(buf.begins_with('+'))) + { + return from_chars(buf.sub(1), val); + } + else if(C4_UNLIKELY(buf == ".nan" || buf == ".NaN" || buf == ".NAN")) + { + *val = std::numeric_limits::quiet_NaN(); + return true; + } + else if(C4_UNLIKELY(buf == ".inf" || buf == ".Inf" || buf == ".INF")) + { + *val = std::numeric_limits::infinity(); + return true; + } + else if(C4_UNLIKELY(buf == "-.inf" || buf == "-.Inf" || buf == "-.INF")) + { + *val = -std::numeric_limits::infinity(); + return true; + } + else + { + return false; + } +} -public: - /** @name node predicates */ - /** @{ */ +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- - C4_ALWAYS_INLINE bool is_stream(size_t node) const { return _p(node)->m_type.is_stream(); } - C4_ALWAYS_INLINE bool is_doc(size_t node) const { return _p(node)->m_type.is_doc(); } - C4_ALWAYS_INLINE bool is_container(size_t node) const { return _p(node)->m_type.is_container(); } - C4_ALWAYS_INLINE bool is_map(size_t node) const { return _p(node)->m_type.is_map(); } - C4_ALWAYS_INLINE bool is_seq(size_t node) const { return _p(node)->m_type.is_seq(); } - C4_ALWAYS_INLINE bool has_key(size_t node) const { return _p(node)->m_type.has_key(); } - C4_ALWAYS_INLINE bool has_val(size_t node) const { return _p(node)->m_type.has_val(); } - C4_ALWAYS_INLINE bool is_val(size_t node) const { return _p(node)->m_type.is_val(); } - C4_ALWAYS_INLINE bool is_keyval(size_t node) const { return _p(node)->m_type.is_keyval(); } - C4_ALWAYS_INLINE bool has_key_tag(size_t node) const { return _p(node)->m_type.has_key_tag(); } - C4_ALWAYS_INLINE bool has_val_tag(size_t node) const { return _p(node)->m_type.has_val_tag(); } - C4_ALWAYS_INLINE bool has_key_anchor(size_t node) const { return _p(node)->m_type.has_key_anchor(); } - C4_ALWAYS_INLINE bool is_key_anchor(size_t node) const { return _p(node)->m_type.is_key_anchor(); } - C4_ALWAYS_INLINE bool has_val_anchor(size_t node) const { return _p(node)->m_type.has_val_anchor(); } - C4_ALWAYS_INLINE bool is_val_anchor(size_t node) const { return _p(node)->m_type.is_val_anchor(); } - C4_ALWAYS_INLINE bool has_anchor(size_t node) const { return _p(node)->m_type.has_anchor(); } - C4_ALWAYS_INLINE bool is_anchor(size_t node) const { return _p(node)->m_type.is_anchor(); } - C4_ALWAYS_INLINE bool is_key_ref(size_t node) const { return _p(node)->m_type.is_key_ref(); } - C4_ALWAYS_INLINE bool is_val_ref(size_t node) const { return _p(node)->m_type.is_val_ref(); } - C4_ALWAYS_INLINE bool is_ref(size_t node) const { return _p(node)->m_type.is_ref(); } - C4_ALWAYS_INLINE bool is_anchor_or_ref(size_t node) const { return _p(node)->m_type.is_anchor_or_ref(); } - C4_ALWAYS_INLINE bool is_key_quoted(size_t node) const { return _p(node)->m_type.is_key_quoted(); } - C4_ALWAYS_INLINE bool is_val_quoted(size_t node) const { return _p(node)->m_type.is_val_quoted(); } - C4_ALWAYS_INLINE bool is_quoted(size_t node) const { return _p(node)->m_type.is_quoted(); } - - C4_ALWAYS_INLINE bool parent_is_seq(size_t node) const { RYML_ASSERT(has_parent(node)); return is_seq(_p(node)->m_parent); } - C4_ALWAYS_INLINE bool parent_is_map(size_t node) const { RYML_ASSERT(has_parent(node)); return is_map(_p(node)->m_parent); } - /** true when key and val are empty, and has no children */ - C4_ALWAYS_INLINE bool empty(size_t node) const { return ! has_children(node) && _p(node)->m_key.empty() && (( ! (_p(node)->m_type & VAL)) || _p(node)->m_val.empty()); } - /** true when the node has an anchor named a */ - C4_ALWAYS_INLINE bool has_anchor(size_t node, csubstr a) const { return _p(node)->m_key.anchor == a || _p(node)->m_val.anchor == a; } +/** @addtogroup doc_tree + * + * @{ + */ - C4_ALWAYS_INLINE bool key_is_null(size_t node) const { RYML_ASSERT(has_key(node)); NodeData const* C4_RESTRICT n = _p(node); return !n->m_type.is_key_quoted() && _is_null(n->m_key.scalar); } - C4_ALWAYS_INLINE bool val_is_null(size_t node) const { RYML_ASSERT(has_val(node)); NodeData const* C4_RESTRICT n = _p(node); return !n->m_type.is_val_quoted() && _is_null(n->m_val.scalar); } - static bool _is_null(csubstr s) noexcept - { - return s.str == nullptr || - s == "~" || - s == "null" || - s == "Null" || - s == "NULL"; - } +/** a node scalar is a csubstr, which may be tagged and anchored. */ +struct NodeScalar +{ + csubstr tag; + csubstr scalar; + csubstr anchor; - /** @} */ +public: + + /// initialize as an empty scalar + inline NodeScalar() noexcept : tag(), scalar(), anchor() {} + + /// initialize as an untagged scalar + template + inline NodeScalar(const char (&s)[N]) noexcept : tag(), scalar(s), anchor() {} + inline NodeScalar(csubstr s ) noexcept : tag(), scalar(s), anchor() {} + + /// initialize as a tagged scalar + template + inline NodeScalar(const char (&t)[N], const char (&s)[N]) noexcept : tag(t), scalar(s), anchor() {} + inline NodeScalar(csubstr t , csubstr s ) noexcept : tag(t), scalar(s), anchor() {} public: - /** @name hierarchy predicates */ - /** @{ */ + ~NodeScalar() noexcept = default; + NodeScalar(NodeScalar &&) noexcept = default; + NodeScalar(NodeScalar const&) noexcept = default; + NodeScalar& operator= (NodeScalar &&) noexcept = default; + NodeScalar& operator= (NodeScalar const&) noexcept = default; - bool is_root(size_t node) const { RYML_ASSERT(_p(node)->m_parent != NONE || node == 0); return _p(node)->m_parent == NONE; } +public: - bool has_parent(size_t node) const { return _p(node)->m_parent != NONE; } + bool empty() const noexcept { return tag.empty() && scalar.empty() && anchor.empty(); } - /** true if @p node has a child with id @p ch */ - bool has_child(size_t node, size_t ch) const { return _p(ch)->m_parent == node; } - /** true if @p node has a child with key @p key */ - bool has_child(size_t node, csubstr key) const { return find_child(node, key) != npos; } - /** true if @p node has any children key */ - bool has_children(size_t node) const { return _p(node)->m_first_child != NONE; } + void clear() noexcept { tag.clear(); scalar.clear(); anchor.clear(); } - /** true if @p node has a sibling with id @p sib */ - bool has_sibling(size_t node, size_t sib) const { return _p(node)->m_parent == _p(sib)->m_parent; } - /** true if one of the node's siblings has the given key */ - bool has_sibling(size_t node, csubstr key) const { return find_sibling(node, key) != npos; } - /** true if node is not a single child */ - bool has_other_siblings(size_t node) const + void set_ref_maybe_replacing_scalar(csubstr ref, bool has_scalar) RYML_NOEXCEPT { - NodeData const *n = _p(node); - if(C4_LIKELY(n->m_parent != NONE)) - { - n = _p(n->m_parent); - return n->m_first_child != n->m_last_child; - } - return false; + csubstr trimmed = ref.begins_with('*') ? ref.sub(1) : ref; + anchor = trimmed; + if((!has_scalar) || !scalar.ends_with(trimmed)) + scalar = ref; } +}; +C4_MUST_BE_TRIVIAL_COPY(NodeScalar); - RYML_DEPRECATED("use has_other_siblings()") bool has_siblings(size_t /*node*/) const { return true; } - - /** @} */ -public: +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- - /** @name hierarchy getters */ - /** @{ */ +/** convenience class to initialize nodes */ +struct NodeInit +{ - size_t parent(size_t node) const { return _p(node)->m_parent; } + NodeType type; + NodeScalar key; + NodeScalar val; - size_t prev_sibling(size_t node) const { return _p(node)->m_prev_sibling; } - size_t next_sibling(size_t node) const { return _p(node)->m_next_sibling; } +public: - /** O(#num_children) */ - size_t num_children(size_t node) const; - size_t child_pos(size_t node, size_t ch) const; - size_t first_child(size_t node) const { return _p(node)->m_first_child; } - size_t last_child(size_t node) const { return _p(node)->m_last_child; } - size_t child(size_t node, size_t pos) const; - size_t find_child(size_t node, csubstr const& key) const; + /// initialize as an empty node + NodeInit() : type(NOTYPE), key(), val() {} + /// initialize as a typed node + NodeInit(NodeType_e t) : type(t), key(), val() {} + /// initialize as a sequence member + NodeInit(NodeScalar const& v) : type(VAL), key(), val(v) { _add_flags(); } + /// initialize as a sequence member with explicit type + NodeInit(NodeScalar const& v, NodeType_e t) : type(t|VAL), key(), val(v) { _add_flags(); } + /// initialize as a mapping member + NodeInit( NodeScalar const& k, NodeScalar const& v) : type(KEYVAL), key(k), val(v) { _add_flags(); } + /// initialize as a mapping member with explicit type + NodeInit(NodeType_e t, NodeScalar const& k, NodeScalar const& v) : type(t), key(k), val(v) { _add_flags(); } + /// initialize as a mapping member with explicit type (eg for SEQ or MAP) + NodeInit(NodeType_e t, NodeScalar const& k ) : type(t), key(k), val( ) { _add_flags(KEY); } - /** O(#num_siblings) */ - /** counts with this */ - size_t num_siblings(size_t node) const { return is_root(node) ? 1 : num_children(_p(node)->m_parent); } - /** does not count with this */ - size_t num_other_siblings(size_t node) const { size_t ns = num_siblings(node); RYML_ASSERT(ns > 0); return ns-1; } - size_t sibling_pos(size_t node, size_t sib) const { RYML_ASSERT( ! is_root(node) || node == root_id()); return child_pos(_p(node)->m_parent, sib); } - size_t first_sibling(size_t node) const { return is_root(node) ? node : _p(_p(node)->m_parent)->m_first_child; } - size_t last_sibling(size_t node) const { return is_root(node) ? node : _p(_p(node)->m_parent)->m_last_child; } - size_t sibling(size_t node, size_t pos) const { return child(_p(node)->m_parent, pos); } - size_t find_sibling(size_t node, csubstr const& key) const { return find_child(_p(node)->m_parent, key); } +public: - size_t doc(size_t i) const { size_t rid = root_id(); RYML_ASSERT(is_stream(rid)); return child(rid, i); } //!< gets the @p i document node index. requires that the root node is a stream. + void clear() + { + type.clear(); + key.clear(); + val.clear(); + } - /** @} */ + void _add_flags(type_bits more_flags=0) + { + type = (type|more_flags); + if( ! key.tag.empty()) + type = (type|KEYTAG); + if( ! val.tag.empty()) + type = (type|VALTAG); + if( ! key.anchor.empty()) + type = (type|KEYANCH); + if( ! val.anchor.empty()) + type = (type|VALANCH); + } -public: + bool _check() const + { + // key cannot be empty + RYML_ASSERT(key.scalar.empty() == ((type & KEY) == 0)); + // key tag cannot be empty + RYML_ASSERT(key.tag.empty() == ((type & KEYTAG) == 0)); + // val may be empty even though VAL is set. But when VAL is not set, val must be empty + RYML_ASSERT(((type & VAL) != 0) || val.scalar.empty()); + // val tag cannot be empty + RYML_ASSERT(val.tag.empty() == ((type & VALTAG) == 0)); + return true; + } +}; - /** @name node modifiers */ - /** @{ */ - void to_keyval(size_t node, csubstr key, csubstr val, type_bits more_flags=0); - void to_map(size_t node, csubstr key, type_bits more_flags=0); - void to_seq(size_t node, csubstr key, type_bits more_flags=0); - void to_val(size_t node, csubstr val, type_bits more_flags=0); - void to_map(size_t node, type_bits more_flags=0); - void to_seq(size_t node, type_bits more_flags=0); - void to_doc(size_t node, type_bits more_flags=0); - void to_stream(size_t node, type_bits more_flags=0); +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- - void set_key(size_t node, csubstr key) { RYML_ASSERT(has_key(node)); _p(node)->m_key.scalar = key; } - void set_val(size_t node, csubstr val) { RYML_ASSERT(has_val(node)); _p(node)->m_val.scalar = val; } +/** contains the data for each YAML node. */ +struct NodeData +{ + NodeType m_type; - void set_key_tag(size_t node, csubstr tag) { RYML_ASSERT(has_key(node)); _p(node)->m_key.tag = tag; _add_flags(node, KEYTAG); } - void set_val_tag(size_t node, csubstr tag) { RYML_ASSERT(has_val(node) || is_container(node)); _p(node)->m_val.tag = tag; _add_flags(node, VALTAG); } + NodeScalar m_key; + NodeScalar m_val; - void set_key_anchor(size_t node, csubstr anchor) { RYML_ASSERT( ! is_key_ref(node)); _p(node)->m_key.anchor = anchor.triml('&'); _add_flags(node, KEYANCH); } - void set_val_anchor(size_t node, csubstr anchor) { RYML_ASSERT( ! is_val_ref(node)); _p(node)->m_val.anchor = anchor.triml('&'); _add_flags(node, VALANCH); } - void set_key_ref (size_t node, csubstr ref ) { RYML_ASSERT( ! has_key_anchor(node)); NodeData* C4_RESTRICT n = _p(node); n->m_key.set_ref_maybe_replacing_scalar(ref, n->m_type.has_key()); _add_flags(node, KEY|KEYREF); } - void set_val_ref (size_t node, csubstr ref ) { RYML_ASSERT( ! has_val_anchor(node)); NodeData* C4_RESTRICT n = _p(node); n->m_val.set_ref_maybe_replacing_scalar(ref, n->m_type.has_val()); _add_flags(node, VAL|VALREF); } + id_type m_parent; + id_type m_first_child; + id_type m_last_child; + id_type m_next_sibling; + id_type m_prev_sibling; +}; +C4_MUST_BE_TRIVIAL_COPY(NodeData); - void rem_key_anchor(size_t node) { _p(node)->m_key.anchor.clear(); _rem_flags(node, KEYANCH); } - void rem_val_anchor(size_t node) { _p(node)->m_val.anchor.clear(); _rem_flags(node, VALANCH); } - void rem_key_ref (size_t node) { _p(node)->m_key.anchor.clear(); _rem_flags(node, KEYREF); } - void rem_val_ref (size_t node) { _p(node)->m_val.anchor.clear(); _rem_flags(node, VALREF); } - void rem_anchor_ref(size_t node) { _p(node)->m_key.anchor.clear(); _p(node)->m_val.anchor.clear(); _rem_flags(node, KEYANCH|VALANCH|KEYREF|VALREF); } - /** @} */ +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +class RYML_EXPORT Tree +{ public: - /** @name tree modifiers */ + /** @name construction and assignment */ /** @{ */ - /** reorder the tree in memory so that all the nodes are stored - * in a linear sequence when visited in depth-first order. - * This will invalidate existing ids, since the node id is its - * position in the node array. */ - void reorder(); + Tree() : Tree(get_callbacks()) {} + Tree(Callbacks const& cb); + Tree(id_type node_capacity, size_t arena_capacity=0) : Tree(node_capacity, arena_capacity, get_callbacks()) {} + Tree(id_type node_capacity, size_t arena_capacity, Callbacks const& cb); - /** Resolve references (aliases <- anchors) in the tree. - * - * Dereferencing is opt-in; after parsing, Tree::resolve() - * has to be called explicitly for obtaining resolved references in the - * tree. This method will resolve all references and substitute the - * anchored values in place of the reference. - * - * This method first does a full traversal of the tree to gather all - * anchors and references in a separate collection, then it goes through - * that collection to locate the names, which it does by obeying the YAML - * standard diktat that "an alias node refers to the most recent node in - * the serialization having the specified anchor" - * - * So, depending on the number of anchor/alias nodes, this is a - * potentially expensive operation, with a best-case linear complexity - * (from the initial traversal). This potential cost is the reason for - * requiring an explicit call. - */ - void resolve(); + ~Tree(); + + Tree(Tree const& that); + Tree(Tree && that) noexcept; + + Tree& operator= (Tree const& that); + Tree& operator= (Tree && that) RYML_NOEXCEPT; /** @} */ public: - /** @name tag directives */ + /** @name memory and sizing */ /** @{ */ - void resolve_tags(); - - size_t num_tag_directives() const; - size_t add_tag_directive(TagDirective const& td); - void clear_tag_directives(); + void reserve(id_type node_capacity); - size_t resolve_tag(substr output, csubstr tag, size_t node_id) const; - csubstr resolve_tag_sub(substr output, csubstr tag, size_t node_id) const - { - size_t needed = resolve_tag(output, tag, node_id); - return needed <= output.len ? output.first(needed) : output; - } + /** clear the tree and zero every node + * @note does NOT clear the arena + * @see clear_arena() */ + void clear(); + inline void clear_arena() { m_arena_pos = 0; } - using tag_directive_const_iterator = TagDirective const*; - tag_directive_const_iterator begin_tag_directives() const { return m_tag_directives; } - tag_directive_const_iterator end_tag_directives() const { return m_tag_directives + num_tag_directives(); } + inline bool empty() const { return m_size == 0; } - struct TagDirectiveProxy - { - tag_directive_const_iterator b, e; - tag_directive_const_iterator begin() const { return b; } - tag_directive_const_iterator end() const { return e; } - }; + inline id_type size() const { return m_size; } + inline id_type capacity() const { return m_cap; } + inline id_type slack() const { RYML_ASSERT(m_cap >= m_size); return m_cap - m_size; } - TagDirectiveProxy tag_directives() const { return TagDirectiveProxy{begin_tag_directives(), end_tag_directives()}; } + Callbacks const& callbacks() const { return m_callbacks; } + void callbacks(Callbacks const& cb) { m_callbacks = cb; } /** @} */ public: - /** @name modifying hierarchy */ + /** @name node getters */ /** @{ */ - /** create and insert a new child of @p parent. insert after the (to-be) - * sibling @p after, which must be a child of @p parent. To insert as the - * first child, set after to NONE */ - C4_ALWAYS_INLINE size_t insert_child(size_t parent, size_t after) + //! get the index of a node belonging to this tree. + //! @p n can be nullptr, in which case NONE is returned + id_type id(NodeData const* n) const { - RYML_ASSERT(parent != NONE); - RYML_ASSERT(is_container(parent) || is_root(parent)); - RYML_ASSERT(after == NONE || (_p(after)->m_parent == parent)); - size_t child = _claim(); - _set_hierarchy(child, parent, after); - return child; + if( ! n) + return NONE; + _RYML_CB_ASSERT(m_callbacks, n >= m_buf && n < m_buf + m_cap); + return static_cast(n - m_buf); } - /** create and insert a node as the first child of @p parent */ - C4_ALWAYS_INLINE size_t prepend_child(size_t parent) { return insert_child(parent, NONE); } - /** create and insert a node as the last child of @p parent */ - C4_ALWAYS_INLINE size_t append_child(size_t parent) { return insert_child(parent, _p(parent)->m_last_child); } - -public: - - #if defined(__clang__) - # pragma clang diagnostic push - # pragma clang diagnostic ignored "-Wnull-dereference" - #elif defined(__GNUC__) - # pragma GCC diagnostic push - # if __GNUC__ >= 6 - # pragma GCC diagnostic ignored "-Wnull-dereference" - # endif - #endif - //! create and insert a new sibling of n. insert after "after" - C4_ALWAYS_INLINE size_t insert_sibling(size_t node, size_t after) + //! get a pointer to a node's NodeData. + //! i can be NONE, in which case a nullptr is returned + inline NodeData *get(id_type node) { - return insert_child(_p(node)->m_parent, after); + if(node == NONE) + return nullptr; + _RYML_CB_ASSERT(m_callbacks, node >= 0 && node < m_cap); + return m_buf + node; } - /** create and insert a node as the first node of @p parent */ - C4_ALWAYS_INLINE size_t prepend_sibling(size_t node) { return prepend_child(_p(node)->m_parent); } - C4_ALWAYS_INLINE size_t append_sibling(size_t node) { return append_child(_p(node)->m_parent); } - -public: - - /** remove an entire branch at once: ie remove the children and the node itself */ - inline void remove(size_t node) + //! get a pointer to a node's NodeData. + //! i can be NONE, in which case a nullptr is returned. + inline NodeData const *get(id_type node) const + { + if(node == NONE) + return nullptr; + _RYML_CB_ASSERT(m_callbacks, node >= 0 && node < m_cap); + return m_buf + node; + } + + //! An if-less form of get() that demands a valid node index. + //! This function is implementation only; use at your own risk. + inline NodeData * _p(id_type node) { _RYML_CB_ASSERT(m_callbacks, node != NONE && node >= 0 && node < m_cap); return m_buf + node; } + //! An if-less form of get() that demands a valid node index. + //! This function is implementation only; use at your own risk. + inline NodeData const * _p(id_type node) const { _RYML_CB_ASSERT(m_callbacks, node != NONE && node >= 0 && node < m_cap); return m_buf + node; } + + //! Get the id of the root node + id_type root_id() { if(m_cap == 0) { reserve(16); } _RYML_CB_ASSERT(m_callbacks, m_cap > 0 && m_size > 0); return 0; } + //! Get the id of the root node + id_type root_id() const { _RYML_CB_ASSERT(m_callbacks, m_cap > 0 && m_size > 0); return 0; } + + //! Get a NodeRef of a node by id + NodeRef ref(id_type node); + //! Get a NodeRef of a node by id + ConstNodeRef ref(id_type node) const; + //! Get a NodeRef of a node by id + ConstNodeRef cref(id_type node) const; + + //! Get the root as a NodeRef + NodeRef rootref(); + //! Get the root as a ConstNodeRef + ConstNodeRef rootref() const; + //! Get the root as a ConstNodeRef + ConstNodeRef crootref() const; + + //! get the i-th document of the stream + //! @note @p i is NOT the node id, but the doc position within the stream + NodeRef docref(id_type i); + //! get the i-th document of the stream + //! @note @p i is NOT the node id, but the doc position within the stream + ConstNodeRef docref(id_type i) const; + //! get the i-th document of the stream + //! @note @p i is NOT the node id, but the doc position within the stream + ConstNodeRef cdocref(id_type i) const; + + //! find a root child by name, return it as a NodeRef + //! @note requires the root to be a map. + NodeRef operator[] (csubstr key); + //! find a root child by name, return it as a NodeRef + //! @note requires the root to be a map. + ConstNodeRef operator[] (csubstr key) const; + + //! find a root child by index: return the root node's @p i-th child as a NodeRef + //! @note @p i is NOT the node id, but the child's position + NodeRef operator[] (id_type i); + //! find a root child by index: return the root node's @p i-th child as a NodeRef + //! @note @p i is NOT the node id, but the child's position + ConstNodeRef operator[] (id_type i) const; + + /** @} */ + +public: + + /** @name node property getters */ + /** @{ */ + + NodeType type(id_type node) const { return _p(node)->m_type; } + const char* type_str(id_type node) const { return NodeType::type_str(_p(node)->m_type); } + + csubstr const& key (id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_key(node)); return _p(node)->m_key.scalar; } + csubstr const& key_tag (id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_key_tag(node)); return _p(node)->m_key.tag; } + csubstr const& key_ref (id_type node) const { _RYML_CB_ASSERT(m_callbacks, is_key_ref(node)); return _p(node)->m_key.anchor; } + csubstr const& key_anchor(id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_key_anchor(node)); return _p(node)->m_key.anchor; } + NodeScalar const& keysc (id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_key(node)); return _p(node)->m_key; } + + csubstr const& val (id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_val(node)); return _p(node)->m_val.scalar; } + csubstr const& val_tag (id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_val_tag(node)); return _p(node)->m_val.tag; } + csubstr const& val_ref (id_type node) const { _RYML_CB_ASSERT(m_callbacks, is_val_ref(node)); return _p(node)->m_val.anchor; } + csubstr const& val_anchor(id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_val_anchor(node)); return _p(node)->m_val.anchor; } + NodeScalar const& valsc (id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_val(node)); return _p(node)->m_val; } + + /** @} */ + +public: + + /** @name node type predicates */ + /** @{ */ + + C4_ALWAYS_INLINE bool type_has_any(id_type node, NodeType_e bits) const { return _p(node)->m_type.has_any(bits); } + C4_ALWAYS_INLINE bool type_has_all(id_type node, NodeType_e bits) const { return _p(node)->m_type.has_all(bits); } + C4_ALWAYS_INLINE bool type_has_none(id_type node, NodeType_e bits) const { return _p(node)->m_type.has_none(bits); } + + C4_ALWAYS_INLINE bool is_stream(id_type node) const { return _p(node)->m_type.is_stream(); } + C4_ALWAYS_INLINE bool is_doc(id_type node) const { return _p(node)->m_type.is_doc(); } + C4_ALWAYS_INLINE bool is_container(id_type node) const { return _p(node)->m_type.is_container(); } + C4_ALWAYS_INLINE bool is_map(id_type node) const { return _p(node)->m_type.is_map(); } + C4_ALWAYS_INLINE bool is_seq(id_type node) const { return _p(node)->m_type.is_seq(); } + C4_ALWAYS_INLINE bool has_key(id_type node) const { return _p(node)->m_type.has_key(); } + C4_ALWAYS_INLINE bool has_val(id_type node) const { return _p(node)->m_type.has_val(); } + C4_ALWAYS_INLINE bool is_val(id_type node) const { return _p(node)->m_type.is_val(); } + C4_ALWAYS_INLINE bool is_keyval(id_type node) const { return _p(node)->m_type.is_keyval(); } + C4_ALWAYS_INLINE bool has_key_tag(id_type node) const { return _p(node)->m_type.has_key_tag(); } + C4_ALWAYS_INLINE bool has_val_tag(id_type node) const { return _p(node)->m_type.has_val_tag(); } + C4_ALWAYS_INLINE bool has_key_anchor(id_type node) const { return _p(node)->m_type.has_key_anchor(); } + C4_ALWAYS_INLINE bool has_val_anchor(id_type node) const { return _p(node)->m_type.has_val_anchor(); } + C4_ALWAYS_INLINE bool has_anchor(id_type node) const { return _p(node)->m_type.has_anchor(); } + C4_ALWAYS_INLINE bool is_key_ref(id_type node) const { return _p(node)->m_type.is_key_ref(); } + C4_ALWAYS_INLINE bool is_val_ref(id_type node) const { return _p(node)->m_type.is_val_ref(); } + C4_ALWAYS_INLINE bool is_ref(id_type node) const { return _p(node)->m_type.is_ref(); } + + C4_ALWAYS_INLINE bool parent_is_seq(id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_parent(node)); return is_seq(_p(node)->m_parent); } + C4_ALWAYS_INLINE bool parent_is_map(id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_parent(node)); return is_map(_p(node)->m_parent); } + + /** true when the node has an anchor named a */ + C4_ALWAYS_INLINE bool has_anchor(id_type node, csubstr a) const { return _p(node)->m_key.anchor == a || _p(node)->m_val.anchor == a; } + + /** true if the node key does not have any KEYQUO flags, and its scalar verifies scalar_is_null(). + * @warning the node must verify .has_key() (asserted) (ie must be a member of a map) + * @see https://github.com/biojppm/rapidyaml/issues/413 */ + C4_ALWAYS_INLINE bool key_is_null(id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_key(node)); NodeData const* C4_RESTRICT n = _p(node); return !n->m_type.is_key_quoted() && scalar_is_null(n->m_key.scalar); } + /** true if the node key does not have any VALQUO flags, and its scalar verifies scalar_is_null(). + * @warning the node must verify .has_val() (asserted) (ie must be a scalar / must not be a container) + * @see https://github.com/biojppm/rapidyaml/issues/413 */ + C4_ALWAYS_INLINE bool val_is_null(id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_val(node)); NodeData const* C4_RESTRICT n = _p(node); return !n->m_type.is_val_quoted() && scalar_is_null(n->m_val.scalar); } + + /// true if the key was a scalar requiring filtering and was left + /// unfiltered during the parsing (see ParserOptions) + C4_ALWAYS_INLINE bool is_key_unfiltered(id_type node) const { return _p(node)->m_type.is_key_unfiltered(); } + /// true if the val was a scalar requiring filtering and was left + /// unfiltered during the parsing (see ParserOptions) + C4_ALWAYS_INLINE bool is_val_unfiltered(id_type node) const { return _p(node)->m_type.is_val_unfiltered(); } + + RYML_DEPRECATED("use has_key_anchor()") bool is_key_anchor(id_type node) const { return _p(node)->m_type.has_key_anchor(); } + RYML_DEPRECATED("use has_val_anchor()") bool is_val_anchor(id_type node) const { return _p(node)->m_type.has_val_anchor(); } + RYML_DEPRECATED("use has_anchor()") bool is_anchor(id_type node) const { return _p(node)->m_type.has_anchor(); } + RYML_DEPRECATED("use has_anchor_or_ref()") bool is_anchor_or_ref(id_type node) const { return _p(node)->m_type.has_anchor() || _p(node)->m_type.is_ref(); } + + /** @} */ + +public: + + /** @name hierarchy predicates */ + /** @{ */ + + bool is_root(id_type node) const { _RYML_CB_ASSERT(m_callbacks, _p(node)->m_parent != NONE || node == 0); return _p(node)->m_parent == NONE; } + + bool has_parent(id_type node) const { return _p(node)->m_parent != NONE; } + + /** true when key and val are empty, and has no children */ + bool empty(id_type node) const { return ! has_children(node) && _p(node)->m_key.empty() && (( ! (_p(node)->m_type & VAL)) || _p(node)->m_val.empty()); } + + /** true if @p node has a child with id @p ch */ + bool has_child(id_type node, id_type ch) const { return _p(ch)->m_parent == node; } + /** true if @p node has a child with key @p key */ + bool has_child(id_type node, csubstr key) const { return find_child(node, key) != NONE; } + /** true if @p node has any children key */ + bool has_children(id_type node) const { return _p(node)->m_first_child != NONE; } + + /** true if @p node has a sibling with id @p sib */ + bool has_sibling(id_type node, id_type sib) const { return _p(node)->m_parent == _p(sib)->m_parent; } + /** true if one of the node's siblings has the given key */ + bool has_sibling(id_type node, csubstr key) const { return find_sibling(node, key) != NONE; } + /** true if node is not a single child */ + bool has_other_siblings(id_type node) const + { + NodeData const *n = _p(node); + if(C4_LIKELY(n->m_parent != NONE)) + { + n = _p(n->m_parent); + return n->m_first_child != n->m_last_child; + } + return false; + } + + RYML_DEPRECATED("use has_other_siblings()") bool has_siblings(id_type /*node*/) const { return true; } + + /** @} */ + +public: + + /** @name hierarchy getters */ + /** @{ */ + + id_type parent(id_type node) const { return _p(node)->m_parent; } + + id_type prev_sibling(id_type node) const { return _p(node)->m_prev_sibling; } + id_type next_sibling(id_type node) const { return _p(node)->m_next_sibling; } + + /** O(#num_children) */ + id_type num_children(id_type node) const; + id_type child_pos(id_type node, id_type ch) const; + id_type first_child(id_type node) const { return _p(node)->m_first_child; } + id_type last_child(id_type node) const { return _p(node)->m_last_child; } + id_type child(id_type node, id_type pos) const; + id_type find_child(id_type node, csubstr const& key) const; + + /** O(#num_siblings) */ + /** counts with this */ + id_type num_siblings(id_type node) const { return is_root(node) ? 1 : num_children(_p(node)->m_parent); } + /** does not count with this */ + id_type num_other_siblings(id_type node) const { id_type ns = num_siblings(node); _RYML_CB_ASSERT(m_callbacks, ns > 0); return ns-1; } + id_type sibling_pos(id_type node, id_type sib) const { _RYML_CB_ASSERT(m_callbacks, ! is_root(node) || node == root_id()); return child_pos(_p(node)->m_parent, sib); } + id_type first_sibling(id_type node) const { return is_root(node) ? node : _p(_p(node)->m_parent)->m_first_child; } + id_type last_sibling(id_type node) const { return is_root(node) ? node : _p(_p(node)->m_parent)->m_last_child; } + id_type sibling(id_type node, id_type pos) const { return child(_p(node)->m_parent, pos); } + id_type find_sibling(id_type node, csubstr const& key) const { return find_child(_p(node)->m_parent, key); } + + id_type doc(id_type i) const { id_type rid = root_id(); _RYML_CB_ASSERT(m_callbacks, is_stream(rid)); return child(rid, i); } //!< gets the @p i document node index. requires that the root node is a stream. + + id_type depth_asc(id_type node) const; /**< O(log(num_tree_nodes)) get the ascending depth of the node: number of levels between root and node */ + id_type depth_desc(id_type node) const; /**< O(num_tree_nodes) get the descending depth of the node: number of levels between node and deepest child */ + + /** @} */ + +public: + + /** @name node style predicates and modifiers. see the corresponding predicate in NodeType */ + /** @{ */ + + C4_ALWAYS_INLINE bool is_container_styled(id_type node) const { return _p(node)->m_type.is_container_styled(); } + C4_ALWAYS_INLINE bool is_block(id_type node) const { return _p(node)->m_type.is_block(); } + C4_ALWAYS_INLINE bool is_flow_sl(id_type node) const { return _p(node)->m_type.is_flow_sl(); } + C4_ALWAYS_INLINE bool is_flow_ml(id_type node) const { return _p(node)->m_type.is_flow_ml(); } + C4_ALWAYS_INLINE bool is_flow(id_type node) const { return _p(node)->m_type.is_flow(); } + + C4_ALWAYS_INLINE bool is_key_styled(id_type node) const { return _p(node)->m_type.is_key_styled(); } + C4_ALWAYS_INLINE bool is_val_styled(id_type node) const { return _p(node)->m_type.is_val_styled(); } + C4_ALWAYS_INLINE bool is_key_literal(id_type node) const { return _p(node)->m_type.is_key_literal(); } + C4_ALWAYS_INLINE bool is_val_literal(id_type node) const { return _p(node)->m_type.is_val_literal(); } + C4_ALWAYS_INLINE bool is_key_folded(id_type node) const { return _p(node)->m_type.is_key_folded(); } + C4_ALWAYS_INLINE bool is_val_folded(id_type node) const { return _p(node)->m_type.is_val_folded(); } + C4_ALWAYS_INLINE bool is_key_squo(id_type node) const { return _p(node)->m_type.is_key_squo(); } + C4_ALWAYS_INLINE bool is_val_squo(id_type node) const { return _p(node)->m_type.is_val_squo(); } + C4_ALWAYS_INLINE bool is_key_dquo(id_type node) const { return _p(node)->m_type.is_key_dquo(); } + C4_ALWAYS_INLINE bool is_val_dquo(id_type node) const { return _p(node)->m_type.is_val_dquo(); } + C4_ALWAYS_INLINE bool is_key_plain(id_type node) const { return _p(node)->m_type.is_key_plain(); } + C4_ALWAYS_INLINE bool is_val_plain(id_type node) const { return _p(node)->m_type.is_val_plain(); } + C4_ALWAYS_INLINE bool is_key_quoted(id_type node) const { return _p(node)->m_type.is_key_quoted(); } + C4_ALWAYS_INLINE bool is_val_quoted(id_type node) const { return _p(node)->m_type.is_val_quoted(); } + C4_ALWAYS_INLINE bool is_quoted(id_type node) const { return _p(node)->m_type.is_quoted(); } + + C4_ALWAYS_INLINE void set_container_style(id_type node, NodeType_e style) { _RYML_CB_ASSERT(m_callbacks, is_container(node)); _p(node)->m_type.set_container_style(style); } + C4_ALWAYS_INLINE void set_key_style(id_type node, NodeType_e style) { _RYML_CB_ASSERT(m_callbacks, has_key(node)); _p(node)->m_type.set_key_style(style); } + C4_ALWAYS_INLINE void set_val_style(id_type node, NodeType_e style) { _RYML_CB_ASSERT(m_callbacks, has_val(node)); _p(node)->m_type.set_val_style(style); } + + /** @} */ + +public: + + /** @name node type modifiers */ + /** @{ */ + + void to_keyval(id_type node, csubstr key, csubstr val, type_bits more_flags=0); + void to_map(id_type node, csubstr key, type_bits more_flags=0); + void to_seq(id_type node, csubstr key, type_bits more_flags=0); + void to_val(id_type node, csubstr val, type_bits more_flags=0); + void to_map(id_type node, type_bits more_flags=0); + void to_seq(id_type node, type_bits more_flags=0); + void to_doc(id_type node, type_bits more_flags=0); + void to_stream(id_type node, type_bits more_flags=0); + + void set_key(id_type node, csubstr key) { _RYML_CB_ASSERT(m_callbacks, has_key(node)); _p(node)->m_key.scalar = key; } + void set_val(id_type node, csubstr val) { _RYML_CB_ASSERT(m_callbacks, has_val(node)); _p(node)->m_val.scalar = val; } + + void set_key_tag(id_type node, csubstr tag) { _RYML_CB_ASSERT(m_callbacks, has_key(node)); _p(node)->m_key.tag = tag; _add_flags(node, KEYTAG); } + void set_val_tag(id_type node, csubstr tag) { _RYML_CB_ASSERT(m_callbacks, has_val(node) || is_container(node)); _p(node)->m_val.tag = tag; _add_flags(node, VALTAG); } + + void set_key_anchor(id_type node, csubstr anchor) { _RYML_CB_ASSERT(m_callbacks, ! is_key_ref(node)); _p(node)->m_key.anchor = anchor.triml('&'); _add_flags(node, KEYANCH); } + void set_val_anchor(id_type node, csubstr anchor) { _RYML_CB_ASSERT(m_callbacks, ! is_val_ref(node)); _p(node)->m_val.anchor = anchor.triml('&'); _add_flags(node, VALANCH); } + void set_key_ref (id_type node, csubstr ref ) { _RYML_CB_ASSERT(m_callbacks, ! has_key_anchor(node)); NodeData* C4_RESTRICT n = _p(node); n->m_key.set_ref_maybe_replacing_scalar(ref, n->m_type.has_key()); _add_flags(node, KEY|KEYREF); } + void set_val_ref (id_type node, csubstr ref ) { _RYML_CB_ASSERT(m_callbacks, ! has_val_anchor(node)); NodeData* C4_RESTRICT n = _p(node); n->m_val.set_ref_maybe_replacing_scalar(ref, n->m_type.has_val()); _add_flags(node, VAL|VALREF); } + + void rem_key_anchor(id_type node) { _p(node)->m_key.anchor.clear(); _rem_flags(node, KEYANCH); } + void rem_val_anchor(id_type node) { _p(node)->m_val.anchor.clear(); _rem_flags(node, VALANCH); } + void rem_key_ref (id_type node) { _p(node)->m_key.anchor.clear(); _rem_flags(node, KEYREF); } + void rem_val_ref (id_type node) { _p(node)->m_val.anchor.clear(); _rem_flags(node, VALREF); } + void rem_anchor_ref(id_type node) { _p(node)->m_key.anchor.clear(); _p(node)->m_val.anchor.clear(); _rem_flags(node, KEYANCH|VALANCH|KEYREF|VALREF); } + + /** @} */ + +public: + + /** @name tree modifiers */ + /** @{ */ + + /** reorder the tree in memory so that all the nodes are stored + * in a linear sequence when visited in depth-first order. + * This will invalidate existing ids, since the node id is its + * position in the tree's node array. */ + void reorder(); + + /** Resolve references (aliases <- anchors) in the tree. + * + * Dereferencing is opt-in; after parsing, Tree::resolve() has to + * be called explicitly for obtaining resolved references in the + * tree. This method will @ref ReferenceResolver::resolve() + * to resolve all references and substitute the anchored values in + * place of the reference. + * + * This method first does a full traversal of the tree to gather all + * anchors and references in a separate collection, then it goes through + * that collection to locate the names, which it does by obeying the YAML + * standard diktat that "an alias node refers to the most recent node in + * the serialization having the specified anchor" + * + * So, depending on the number of anchor/alias nodes, this is a + * potentially expensive operation, with a best-case linear complexity + * (from the initial traversal). This potential cost is the reason for + * requiring an explicit call. + * + * @see ReferenceResolver::resolve() + */ + void resolve(ReferenceResolver *C4_RESTRICT rr); + + /** Resolve references using a throw-away resolver. */ + void resolve(); + + /** @} */ + +public: + + /** @name tag directives */ + /** @{ */ + + void resolve_tags(); + void normalize_tags(); + void normalize_tags_long(); + + id_type num_tag_directives() const; + bool add_tag_directive(csubstr directive); + id_type add_tag_directive(TagDirective const& td); + void clear_tag_directives(); + + /** resolve the given tag, appearing at node_id. Write the result into output. + * @return the number of characters required for the resolved tag */ + size_t resolve_tag(substr output, csubstr tag, id_type node_id) const; + csubstr resolve_tag_sub(substr output, csubstr tag, id_type node_id) const + { + size_t needed = resolve_tag(output, tag, node_id); + return needed <= output.len ? output.first(needed) : output; + } + + TagDirective const* begin_tag_directives() const { return m_tag_directives; } + TagDirective const* end_tag_directives() const { return m_tag_directives + num_tag_directives(); } + c4::yml::TagDirectiveRange tag_directives() const { return c4::yml::TagDirectiveRange{begin_tag_directives(), end_tag_directives()}; } + + RYML_DEPRECATED("use c4::yml::tag_directive_const_iterator") typedef TagDirective const* tag_directive_const_iterator; + RYML_DEPRECATED("use c4::yml::TagDirectiveRange") typedef c4::yml::TagDirectiveRange TagDirectiveProxy; + + /** @} */ + +public: + + /** @name modifying hierarchy */ + /** @{ */ + + /** create and insert a new child of @p parent. insert after the (to-be) + * sibling @p after, which must be a child of @p parent. To insert as the + * first child, set after to NONE */ + C4_ALWAYS_INLINE id_type insert_child(id_type parent, id_type after) + { + _RYML_CB_ASSERT(m_callbacks, parent != NONE); + _RYML_CB_ASSERT(m_callbacks, is_container(parent) || is_root(parent)); + _RYML_CB_ASSERT(m_callbacks, after == NONE || (_p(after)->m_parent == parent)); + id_type child = _claim(); + _set_hierarchy(child, parent, after); + return child; + } + /** create and insert a node as the first child of @p parent */ + C4_ALWAYS_INLINE id_type prepend_child(id_type parent) { return insert_child(parent, NONE); } + /** create and insert a node as the last child of @p parent */ + C4_ALWAYS_INLINE id_type append_child(id_type parent) { return insert_child(parent, _p(parent)->m_last_child); } + C4_ALWAYS_INLINE id_type _append_child__unprotected(id_type parent) + { + id_type child = _claim(); + _set_hierarchy(child, parent, _p(parent)->m_last_child); + return child; + } + +public: + + #if defined(__clang__) + # pragma clang diagnostic push + # pragma clang diagnostic ignored "-Wnull-dereference" + #elif defined(__GNUC__) + # pragma GCC diagnostic push + # if __GNUC__ >= 6 + # pragma GCC diagnostic ignored "-Wnull-dereference" + # endif + #endif + + //! create and insert a new sibling of n. insert after "after" + C4_ALWAYS_INLINE id_type insert_sibling(id_type node, id_type after) + { + return insert_child(_p(node)->m_parent, after); + } + /** create and insert a node as the first node of @p parent */ + C4_ALWAYS_INLINE id_type prepend_sibling(id_type node) { return prepend_child(_p(node)->m_parent); } + C4_ALWAYS_INLINE id_type append_sibling(id_type node) { return append_child(_p(node)->m_parent); } + +public: + + /** remove an entire branch at once: ie remove the children and the node itself */ + inline void remove(id_type node) { remove_children(node); _release(node); } /** remove all the node's children, but keep the node itself */ - void remove_children(size_t node); + void remove_children(id_type node); /** change the @p type of the node to one of MAP, SEQ or VAL. @p * type must have one and only one of MAP,SEQ,VAL; @p type may @@ -19684,9 +22019,9 @@ class RYML_EXPORT Tree * initialize with a null scalar (~), changing to MAP will * initialize with an empty map ({}), and changing to SEQ will * initialize with an empty seq ([]). */ - bool change_type(size_t node, NodeType type); + bool change_type(id_type node, NodeType type); - bool change_type(size_t node, type_bits type) + bool change_type(id_type node, type_bits type) { return change_type(node, (NodeType)type); } @@ -19700,14 +22035,14 @@ class RYML_EXPORT Tree public: /** change the node's position in the parent */ - void move(size_t node, size_t after); + void move(id_type node, id_type after); /** change the node's parent and position */ - void move(size_t node, size_t new_parent, size_t after); + void move(id_type node, id_type new_parent, id_type after); /** change the node's parent and position to a different tree * @return the index of the new node in the destination tree */ - size_t move(Tree * src, size_t node, size_t new_parent, size_t after); + id_type move(Tree * src, id_type node, id_type new_parent, id_type after); /** ensure the first node is a stream. Eg, change this tree * @@ -19737,34 +22072,34 @@ class RYML_EXPORT Tree /** recursively duplicate a node from this tree into a new parent, * placing it after one of its children * @return the index of the copy */ - size_t duplicate(size_t node, size_t new_parent, size_t after); + id_type duplicate(id_type node, id_type new_parent, id_type after); /** recursively duplicate a node from a different tree into a new parent, * placing it after one of its children * @return the index of the copy */ - size_t duplicate(Tree const* src, size_t node, size_t new_parent, size_t after); + id_type duplicate(Tree const* src, id_type node, id_type new_parent, id_type after); /** recursively duplicate the node's children (but not the node) * @return the index of the last duplicated child */ - size_t duplicate_children(size_t node, size_t parent, size_t after); + id_type duplicate_children(id_type node, id_type parent, id_type after); /** recursively duplicate the node's children (but not the node), where * the node is from a different tree * @return the index of the last duplicated child */ - size_t duplicate_children(Tree const* src, size_t node, size_t parent, size_t after); + id_type duplicate_children(Tree const* src, id_type node, id_type parent, id_type after); - void duplicate_contents(size_t node, size_t where); - void duplicate_contents(Tree const* src, size_t node, size_t where); + void duplicate_contents(id_type node, id_type where); + void duplicate_contents(Tree const* src, id_type node, id_type where); /** duplicate the node's children (but not the node) in a new parent, but * omit repetitions where a duplicated node has the same key (in maps) or * value (in seqs). If one of the duplicated children has the same key * (in maps) or value (in seqs) as one of the parent's children, the one * that is placed closest to the end will prevail. */ - size_t duplicate_children_no_rep(size_t node, size_t parent, size_t after); - size_t duplicate_children_no_rep(Tree const* src, size_t node, size_t parent, size_t after); + id_type duplicate_children_no_rep(id_type node, id_type parent, id_type after); + id_type duplicate_children_no_rep(Tree const* src, id_type node, id_type parent, id_type after); public: - void merge_with(Tree const* src, size_t src_node=NONE, size_t dst_root=NONE); + void merge_with(Tree const* src, id_type src_node=NONE, id_type dst_root=NONE); /** @} */ @@ -19780,10 +22115,12 @@ class RYML_EXPORT Tree /** get the current capacity of the tree's internal arena */ inline size_t arena_capacity() const { return m_arena.len; } /** get the current slack of the tree's internal arena */ - inline size_t arena_slack() const { RYML_ASSERT(m_arena.len >= m_arena_pos); return m_arena.len - m_arena_pos; } + inline size_t arena_slack() const { _RYML_CB_ASSERT(m_callbacks, m_arena.len >= m_arena_pos); return m_arena.len - m_arena_pos; } /** get the current arena */ - substr arena() const { return m_arena.first(m_arena_pos); } + csubstr arena() const { return m_arena.first(m_arena_pos); } + /** get the current arena */ + substr arena() { return m_arena.first(m_arena_pos); } /** return true if the given substring is part of the tree's string arena */ bool in_arena(csubstr s) const @@ -19811,7 +22148,7 @@ class RYML_EXPORT Tree { rem = _grow_arena(num); num = to_chars_float(rem, a); - RYML_ASSERT(num <= rem.len); + _RYML_CB_ASSERT(m_callbacks, num <= rem.len); } rem = _request_span(num); return rem; @@ -19837,7 +22174,7 @@ class RYML_EXPORT Tree { rem = _grow_arena(num); num = to_chars(rem, a); - RYML_ASSERT(num <= rem.len); + _RYML_CB_ASSERT(m_callbacks, num <= rem.len); } rem = _request_span(num); return rem; @@ -19863,7 +22200,7 @@ class RYML_EXPORT Tree { rem = _grow_arena(num); num = to_chars(rem, a); - RYML_ASSERT(num <= rem.len); + _RYML_CB_ASSERT(m_callbacks, num <= rem.len); } return _request_span(num); } @@ -19906,8 +22243,8 @@ class RYML_EXPORT Tree substr copy_to_arena(csubstr s) { substr cp = alloc_arena(s.len); - RYML_ASSERT(cp.len == s.len); - RYML_ASSERT(!s.overlaps(cp)); + _RYML_CB_ASSERT(m_callbacks, cp.len == s.len); + _RYML_CB_ASSERT(m_callbacks, !s.overlaps(cp)); #if (!defined(__clang__)) && (defined(__GNUC__) && __GNUC__ >= 10) C4_SUPPRESS_WARNING_GCC_PUSH C4_SUPPRESS_WARNING_GCC("-Wstringop-overflow=") // no need for terminating \0 @@ -19940,8 +22277,8 @@ class RYML_EXPORT Tree } /** ensure the tree's internal string arena is at least the given capacity - * @note This operation has a potential complexity of O(numNodes)+O(arenasize). - * Growing the arena may cause relocation of the entire + * @warning This operation may be expensive, with a potential complexity of O(numNodes)+O(arenasize). + * @warning Growing the arena may cause relocation of the entire * existing arena, and thus change the contents of individual nodes. */ void reserve_arena(size_t arena_cap) { @@ -19952,7 +22289,7 @@ class RYML_EXPORT Tree buf.len = arena_cap; if(m_arena.str) { - RYML_ASSERT(m_arena.len >= 0); + _RYML_CB_ASSERT(m_callbacks, m_arena.len >= 0); _relocate(buf); // does a memcpy and changes nodes using the arena m_callbacks.m_free(m_arena.str, m_arena.len, m_callbacks.m_user_data); } @@ -19975,6 +22312,7 @@ class RYML_EXPORT Tree substr _request_span(size_t sz) { + _RYML_CB_ASSERT(m_callbacks, m_arena_pos + sz <= m_arena.len); substr s; s = m_arena.sub(m_arena_pos, sz); m_arena_pos += sz; @@ -19983,12 +22321,12 @@ class RYML_EXPORT Tree substr _relocated(csubstr s, substr next_arena) const { - RYML_ASSERT(m_arena.is_super(s)); - RYML_ASSERT(m_arena.sub(0, m_arena_pos).is_super(s)); - auto pos = (s.str - m_arena.str); + _RYML_CB_ASSERT(m_callbacks, m_arena.is_super(s)); + _RYML_CB_ASSERT(m_callbacks, m_arena.sub(0, m_arena_pos).is_super(s)); + auto pos = (s.str - m_arena.str); // this is larger than 0 based on the assertions above substr r(next_arena.str + pos, s.len); - RYML_ASSERT(r.str - next_arena.str == pos); - RYML_ASSERT(next_arena.sub(0, m_arena_pos).is_super(r)); + _RYML_CB_ASSERT(m_callbacks, r.str - next_arena.str == pos); + _RYML_CB_ASSERT(m_callbacks, next_arena.sub(0, m_arena_pos).is_super(r)); return r; } @@ -19999,15 +22337,15 @@ class RYML_EXPORT Tree struct lookup_result { - size_t target; - size_t closest; + id_type target; + id_type closest; size_t path_pos; csubstr path; inline operator bool() const { return target != NONE; } lookup_result() : target(NONE), closest(NONE), path_pos(0), path() {} - lookup_result(csubstr path_, size_t start) : target(NONE), closest(start), path_pos(0), path(path_) {} + lookup_result(csubstr path_, id_type start) : target(NONE), closest(start), path_pos(0), path(path_) {} /** get the part ot the input path that was resolved */ csubstr resolved() const; @@ -20016,19 +22354,19 @@ class RYML_EXPORT Tree }; /** for example foo.bar[0].baz */ - lookup_result lookup_path(csubstr path, size_t start=NONE) const; + lookup_result lookup_path(csubstr path, id_type start=NONE) const; /** defaulted lookup: lookup @p path; if the lookup fails, recursively modify * the tree so that the corresponding lookup_path() would return the * default value. * @see lookup_path() */ - size_t lookup_path_or_modify(csubstr default_value, csubstr path, size_t start=NONE); + id_type lookup_path_or_modify(csubstr default_value, csubstr path, id_type start=NONE); /** defaulted lookup: lookup @p path; if the lookup fails, recursively modify * the tree so that the corresponding lookup_path() would return the * branch @p src_node (from the tree @p src). * @see lookup_path() */ - size_t lookup_path_or_modify(Tree const *src, size_t src_node, csubstr path, size_t start=NONE); + id_type lookup_path_or_modify(Tree const *src, id_type src_node, csubstr path, id_type start=NONE); /** @} */ @@ -20044,13 +22382,13 @@ class RYML_EXPORT Tree bool is_index() const { return value.begins_with('[') && value.ends_with(']'); } }; - size_t _lookup_path_or_create(csubstr path, size_t start); + id_type _lookup_path_or_create(csubstr path, id_type start); void _lookup_path (lookup_result *r) const; void _lookup_path_modify(lookup_result *r); - size_t _next_node (lookup_result *r, _lookup_path_token *parent) const; - size_t _next_node_modify(lookup_result *r, _lookup_path_token *parent); + id_type _next_node (lookup_result *r, _lookup_path_token *parent) const; + id_type _next_node_modify(lookup_result *r, _lookup_path_token *parent); void _advance(lookup_result *r, size_t more) const; @@ -20061,16 +22399,18 @@ class RYML_EXPORT Tree void _clear(); void _free(); void _copy(Tree const& that); - void _move(Tree & that); + void _move(Tree & that) noexcept; void _relocate(substr next_arena); public: + /** @cond dev*/ + #if ! RYML_USE_ASSERT - C4_ALWAYS_INLINE void _check_next_flags(size_t, type_bits) {} + C4_ALWAYS_INLINE void _check_next_flags(id_type, type_bits) {} #else - void _check_next_flags(size_t node, type_bits f) + void _check_next_flags(id_type node, type_bits f) { auto n = _p(node); type_bits o = n->m_type; // old @@ -20091,58 +22431,58 @@ class RYML_EXPORT Tree } if(f & KEY) { - RYML_ASSERT(!is_root(node)); + _RYML_CB_ASSERT(m_callbacks, !is_root(node)); auto pid = parent(node); C4_UNUSED(pid); - RYML_ASSERT(is_map(pid)); + _RYML_CB_ASSERT(m_callbacks, is_map(pid)); } if((f & VAL) && !is_root(node)) { auto pid = parent(node); C4_UNUSED(pid); - RYML_ASSERT(is_map(pid) || is_seq(pid)); + _RYML_CB_ASSERT(m_callbacks, is_map(pid) || is_seq(pid)); } } #endif - inline void _set_flags(size_t node, NodeType_e f) { _check_next_flags(node, f); _p(node)->m_type = f; } - inline void _set_flags(size_t node, type_bits f) { _check_next_flags(node, f); _p(node)->m_type = f; } + inline void _set_flags(id_type node, NodeType_e f) { _check_next_flags(node, f); _p(node)->m_type = f; } + inline void _set_flags(id_type node, type_bits f) { _check_next_flags(node, f); _p(node)->m_type = f; } - inline void _add_flags(size_t node, NodeType_e f) { NodeData *d = _p(node); type_bits fb = f | d->m_type; _check_next_flags(node, fb); d->m_type = (NodeType_e) fb; } - inline void _add_flags(size_t node, type_bits f) { NodeData *d = _p(node); f |= d->m_type; _check_next_flags(node, f); d->m_type = f; } + inline void _add_flags(id_type node, NodeType_e f) { NodeData *d = _p(node); type_bits fb = f | d->m_type; _check_next_flags(node, fb); d->m_type = (NodeType_e) fb; } + inline void _add_flags(id_type node, type_bits f) { NodeData *d = _p(node); f |= d->m_type; _check_next_flags(node, f); d->m_type = f; } - inline void _rem_flags(size_t node, NodeType_e f) { NodeData *d = _p(node); type_bits fb = d->m_type & ~f; _check_next_flags(node, fb); d->m_type = (NodeType_e) fb; } - inline void _rem_flags(size_t node, type_bits f) { NodeData *d = _p(node); f = d->m_type & ~f; _check_next_flags(node, f); d->m_type = f; } + inline void _rem_flags(id_type node, NodeType_e f) { NodeData *d = _p(node); type_bits fb = d->m_type & ~f; _check_next_flags(node, fb); d->m_type = (NodeType_e) fb; } + inline void _rem_flags(id_type node, type_bits f) { NodeData *d = _p(node); f = d->m_type & ~f; _check_next_flags(node, f); d->m_type = f; } - void _set_key(size_t node, csubstr key, type_bits more_flags=0) + void _set_key(id_type node, csubstr key, type_bits more_flags=0) { _p(node)->m_key.scalar = key; _add_flags(node, KEY|more_flags); } - void _set_key(size_t node, NodeScalar const& key, type_bits more_flags=0) + void _set_key(id_type node, NodeScalar const& key, type_bits more_flags=0) { _p(node)->m_key = key; _add_flags(node, KEY|more_flags); } - void _set_val(size_t node, csubstr val, type_bits more_flags=0) + void _set_val(id_type node, csubstr val, type_bits more_flags=0) { - RYML_ASSERT(num_children(node) == 0); - RYML_ASSERT(!is_seq(node) && !is_map(node)); + _RYML_CB_ASSERT(m_callbacks, num_children(node) == 0); + _RYML_CB_ASSERT(m_callbacks, !is_seq(node) && !is_map(node)); _p(node)->m_val.scalar = val; _add_flags(node, VAL|more_flags); } - void _set_val(size_t node, NodeScalar const& val, type_bits more_flags=0) + void _set_val(id_type node, NodeScalar const& val, type_bits more_flags=0) { - RYML_ASSERT(num_children(node) == 0); - RYML_ASSERT( ! is_container(node)); + _RYML_CB_ASSERT(m_callbacks, num_children(node) == 0); + _RYML_CB_ASSERT(m_callbacks, ! is_container(node)); _p(node)->m_val = val; _add_flags(node, VAL|more_flags); } - void _set(size_t node, NodeInit const& i) + void _set(id_type node, NodeInit const& i) { - RYML_ASSERT(i._check()); + _RYML_CB_ASSERT(m_callbacks, i._check()); NodeData *n = _p(node); - RYML_ASSERT(n->m_key.scalar.empty() || i.key.scalar.empty() || i.key.scalar == n->m_key.scalar); + _RYML_CB_ASSERT(m_callbacks, n->m_key.scalar.empty() || i.key.scalar.empty() || i.key.scalar == n->m_key.scalar); _add_flags(node, i.type); if(n->m_key.scalar.empty()) { @@ -20155,10 +22495,10 @@ class RYML_EXPORT Tree n->m_val = i.val; } - void _set_parent_as_container_if_needed(size_t in) + void _set_parent_as_container_if_needed(id_type in) { NodeData const* n = _p(in); - size_t ip = parent(in); + id_type ip = parent(in); if(ip != NONE) { if( ! (is_seq(ip) || is_map(ip))) @@ -20178,10 +22518,10 @@ class RYML_EXPORT Tree } } - void _seq2map(size_t node) + void _seq2map(id_type node) { - RYML_ASSERT(is_seq(node)); - for(size_t i = first_child(node); i != NONE; i = next_sibling(i)) + _RYML_CB_ASSERT(m_callbacks, is_seq(node)); + for(id_type i = first_child(node); i != NONE; i = next_sibling(i)) { NodeData *C4_RESTRICT ch = _p(i); if(ch->m_type.is_keyval()) @@ -20194,24 +22534,24 @@ class RYML_EXPORT Tree n->m_type.add(MAP); } - size_t _do_reorder(size_t *node, size_t count); + id_type _do_reorder(id_type *node, id_type count); - void _swap(size_t n_, size_t m_); - void _swap_props(size_t n_, size_t m_); - void _swap_hierarchy(size_t n_, size_t m_); - void _copy_hierarchy(size_t dst_, size_t src_); + void _swap(id_type n_, id_type m_); + void _swap_props(id_type n_, id_type m_); + void _swap_hierarchy(id_type n_, id_type m_); + void _copy_hierarchy(id_type dst_, id_type src_); - inline void _copy_props(size_t dst_, size_t src_) + inline void _copy_props(id_type dst_, id_type src_) { _copy_props(dst_, this, src_); } - inline void _copy_props_wo_key(size_t dst_, size_t src_) + inline void _copy_props_wo_key(id_type dst_, id_type src_) { _copy_props_wo_key(dst_, this, src_); } - void _copy_props(size_t dst_, Tree const* that_tree, size_t src_) + void _copy_props(id_type dst_, Tree const* that_tree, id_type src_) { auto & C4_RESTRICT dst = *_p(dst_); auto const& C4_RESTRICT src = *that_tree->_p(src_); @@ -20220,7 +22560,16 @@ class RYML_EXPORT Tree dst.m_val = src.m_val; } - void _copy_props_wo_key(size_t dst_, Tree const* that_tree, size_t src_) + void _copy_props(id_type dst_, Tree const* that_tree, id_type src_, type_bits src_mask) + { + auto & C4_RESTRICT dst = *_p(dst_); + auto const& C4_RESTRICT src = *that_tree->_p(src_); + dst.m_type = (src.m_type & src_mask) | (dst.m_type & ~src_mask); + dst.m_key = src.m_key; + dst.m_val = src.m_val; + } + + void _copy_props_wo_key(id_type dst_, Tree const* that_tree, id_type src_) { auto & C4_RESTRICT dst = *_p(dst_); auto const& C4_RESTRICT src = *that_tree->_p(src_); @@ -20228,12 +22577,20 @@ class RYML_EXPORT Tree dst.m_val = src.m_val; } - inline void _clear_type(size_t node) + void _copy_props_wo_key(id_type dst_, Tree const* that_tree, id_type src_, type_bits src_mask) + { + auto & C4_RESTRICT dst = *_p(dst_); + auto const& C4_RESTRICT src = *that_tree->_p(src_); + dst.m_type = (src.m_type & ((~_KEYMASK)|src_mask)) | (dst.m_type & (_KEYMASK|~src_mask)); + dst.m_val = src.m_val; + } + + inline void _clear_type(id_type node) { _p(node)->m_type = NOTYPE; } - inline void _clear(size_t node) + inline void _clear(id_type node) { auto *C4_RESTRICT n = _p(node); n->m_type = NOTYPE; @@ -20244,42 +22601,46 @@ class RYML_EXPORT Tree n->m_last_child = NONE; } - inline void _clear_key(size_t node) + inline void _clear_key(id_type node) { _p(node)->m_key.clear(); _rem_flags(node, KEY); } - inline void _clear_val(size_t node) + inline void _clear_val(id_type node) { _p(node)->m_val.clear(); _rem_flags(node, VAL); } + /** @endcond */ + private: - void _clear_range(size_t first, size_t num); + void _clear_range(id_type first, id_type num); - size_t _claim(); +public: + id_type _claim(); +private: void _claim_root(); - void _release(size_t node); - void _free_list_add(size_t node); - void _free_list_rem(size_t node); + void _release(id_type node); + void _free_list_add(id_type node); + void _free_list_rem(id_type node); - void _set_hierarchy(size_t node, size_t parent, size_t after_sibling); - void _rem_hierarchy(size_t node); + void _set_hierarchy(id_type node, id_type parent, id_type after_sibling); + void _rem_hierarchy(id_type node); public: // members are exposed, but you should NOT access them directly - NodeData * m_buf; - size_t m_cap; + NodeData *m_buf; + id_type m_cap; - size_t m_size; + id_type m_size; - size_t m_free_head; - size_t m_free_tail; + id_type m_free_head; + id_type m_free_tail; substr m_arena; size_t m_arena_pos; @@ -20290,6 +22651,8 @@ class RYML_EXPORT Tree }; +/** @} */ + } // namespace yml } // namespace c4 @@ -20315,8 +22678,7 @@ C4_SUPPRESS_WARNING_GCC_CLANG_POP #ifndef _C4_YML_NODE_HPP_ #define _C4_YML_NODE_HPP_ -/** @file node.hpp - * @see NodeRef */ +/** @file node.hpp Node classes */ //included above: //#include @@ -20336,12 +22698,16 @@ C4_SUPPRESS_WARNING_GCC_CLANG_POP #endif /* C4_BASE64_HPP_ */ -#ifdef __GNUC__ +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wtype-limits" +# pragma clang diagnostic ignored "-Wold-style-cast" +#elif defined(__GNUC__) # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Wtype-limits" -#endif - -#if defined(_MSC_VER) +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wuseless-cast" +#elif defined(_MSC_VER) # pragma warning(push) # pragma warning(disable: 4251/*needs to have dll-interface to be used by clients of struct*/) # pragma warning(disable: 4296/*expression is always 'boolean_value'*/) @@ -20350,6 +22716,16 @@ C4_SUPPRESS_WARNING_GCC_CLANG_POP namespace c4 { namespace yml { +/** @addtogroup doc_node_classes + * + * @{ + */ + + +/** @defgroup doc_serialization_helpers Serialization helpers + * + * @{ + */ template struct Key { K & k; }; template<> struct Key { fmt::const_base64_wrapper wrapper; }; template<> struct Key { fmt::base64_wrapper wrapper; }; @@ -20368,6 +22744,8 @@ template typename std::enable_if< std::is_floating_point::value, bool>::type read(NodeRef const& n, T *v); +/** @} */ + //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- @@ -20377,10 +22755,12 @@ read(NodeRef const& n, T *v); class NodeRef; class ConstNodeRef; + //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- +/** @cond dev */ namespace detail { template @@ -20390,9 +22770,9 @@ struct child_iterator using tree_type = typename NodeRefType::tree_type; tree_type * C4_RESTRICT m_tree; - size_t m_child_id; + id_type m_child_id; - child_iterator(tree_type * t, size_t id) : m_tree(t), m_child_id(id) {} + child_iterator(tree_type * t, id_type id) : m_tree(t), m_child_id(id) {} child_iterator& operator++ () { RYML_ASSERT(m_child_id != NONE); m_child_id = m_tree->next_sibling(m_child_id); return *this; } child_iterator& operator-- () { RYML_ASSERT(m_child_id != NONE); m_child_id = m_tree->prev_sibling(m_child_id); return *this; } @@ -20419,9 +22799,9 @@ struct children_view_ }; template -bool _visit(NodeRefType &node, Visitor fn, size_t indentation_level, bool skip_root=false) +bool _visit(NodeRefType &node, Visitor fn, id_type indentation_level, bool skip_root=false) { - size_t increment = 0; + id_type increment = 0; if( ! (node.is_root() && skip_root)) { if(fn(node, indentation_level)) @@ -20442,9 +22822,9 @@ bool _visit(NodeRefType &node, Visitor fn, size_t indentation_level, bool skip_r } template -bool _visit_stacked(NodeRefType &node, Visitor fn, size_t indentation_level, bool skip_root=false) +bool _visit_stacked(NodeRefType &node, Visitor fn, id_type indentation_level, bool skip_root=false) { - size_t increment = 0; + id_type increment = 0; if( ! (node.is_root() && skip_root)) { if(fn(node, indentation_level)) @@ -20469,24 +22849,38 @@ bool _visit_stacked(NodeRefType &node, Visitor fn, size_t indentation_level, boo return false; } +template +struct RoNodeMethods; +} // detail +/** @endcond */ //----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + -/** a CRTP base for read-only node methods */ +/** a CRTP base providing read-only methods for @ref ConstNodeRef and @ref NodeRef */ +namespace detail { template struct RoNodeMethods { C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wcast-align") + /** @cond dev */ // helper CRTP macros, undefined at the end #define tree_ ((ConstImpl const* C4_RESTRICT)this)->m_tree #define id_ ((ConstImpl const* C4_RESTRICT)this)->m_id #define tree__ ((Impl const* C4_RESTRICT)this)->m_tree #define id__ ((Impl const* C4_RESTRICT)this)->m_id - // require valid - #define _C4RV() \ + // require readable: this is a precondition for reading from the + // tree using this object. + #define _C4RR() \ RYML_ASSERT(tree_ != nullptr); \ - _RYML_CB_ASSERT(tree_->m_callbacks, id_ != NONE) + _RYML_CB_ASSERT(tree_->m_callbacks, id_ != NONE); \ + _RYML_CB_ASSERT(tree_->m_callbacks, (((Impl const* C4_RESTRICT)this)->readable())) + // a SFINAE beautifier to enable a function only if the + // implementation is mutable #define _C4_IF_MUTABLE(ty) typename std::enable_if::value, ty>::type + /** @endcond */ public: @@ -20494,85 +22888,124 @@ struct RoNodeMethods /** @{ */ /** returns the data or null when the id is NONE */ - C4_ALWAYS_INLINE C4_PURE NodeData const* get() const noexcept { RYML_ASSERT(tree_ != nullptr); return tree_->get(id_); } + C4_ALWAYS_INLINE NodeData const* get() const RYML_NOEXCEPT { return ((Impl const*)this)->readable() ? tree_->get(id_) : nullptr; } /** returns the data or null when the id is NONE */ template - C4_ALWAYS_INLINE C4_PURE auto get() noexcept -> _C4_IF_MUTABLE(NodeData*) { RYML_ASSERT(tree_ != nullptr); return tree__->get(id__); } + C4_ALWAYS_INLINE auto get() RYML_NOEXCEPT -> _C4_IF_MUTABLE(NodeData*) { return ((Impl const*)this)->readable() ? tree__->get(id__) : nullptr; } + + C4_ALWAYS_INLINE NodeType type() const RYML_NOEXCEPT { _C4RR(); return tree_->type(id_); } /**< Forward to @ref Tree::type_str(). Node must be readable. */ + C4_ALWAYS_INLINE const char* type_str() const RYML_NOEXCEPT { _C4RR(); return tree_->type_str(id_); } /**< Forward to @ref Tree::type_str(). Node must be readable. */ - C4_ALWAYS_INLINE C4_PURE NodeType type() const noexcept { _C4RV(); return tree_->type(id_); } - C4_ALWAYS_INLINE C4_PURE const char* type_str() const noexcept { return tree_->type_str(id_); } + C4_ALWAYS_INLINE csubstr key() const RYML_NOEXCEPT { _C4RR(); return tree_->key(id_); } /**< Forward to @ref Tree::key(). Node must be readable. */ + C4_ALWAYS_INLINE csubstr key_tag() const RYML_NOEXCEPT { _C4RR(); return tree_->key_tag(id_); } /**< Forward to @ref Tree::key_tag(). Node must be readable. */ + C4_ALWAYS_INLINE csubstr key_ref() const RYML_NOEXCEPT { _C4RR(); return tree_->key_ref(id_); } /**< Forward to @ref Tree::key_ref(). Node must be readable. */ + C4_ALWAYS_INLINE csubstr key_anchor() const RYML_NOEXCEPT { _C4RR(); return tree_->key_anchor(id_); } /**< Forward to @ref Tree::key_anchor(). Node must be readable. */ - C4_ALWAYS_INLINE C4_PURE csubstr key() const noexcept { _C4RV(); return tree_->key(id_); } - C4_ALWAYS_INLINE C4_PURE csubstr key_tag() const noexcept { _C4RV(); return tree_->key_tag(id_); } - C4_ALWAYS_INLINE C4_PURE csubstr key_ref() const noexcept { _C4RV(); return tree_->key_ref(id_); } - C4_ALWAYS_INLINE C4_PURE csubstr key_anchor() const noexcept { _C4RV(); return tree_->key_anchor(id_); } + C4_ALWAYS_INLINE csubstr val() const RYML_NOEXCEPT { _C4RR(); return tree_->val(id_); } /**< Forward to @ref Tree::val(). Node must be readable. */ + C4_ALWAYS_INLINE csubstr val_tag() const RYML_NOEXCEPT { _C4RR(); return tree_->val_tag(id_); } /**< Forward to @ref Tree::val_tag(). Node must be readable. */ + C4_ALWAYS_INLINE csubstr val_ref() const RYML_NOEXCEPT { _C4RR(); return tree_->val_ref(id_); } /**< Forward to @ref Tree::val_ref(). Node must be readable. */ + C4_ALWAYS_INLINE csubstr val_anchor() const RYML_NOEXCEPT { _C4RR(); return tree_->val_anchor(id_); } /**< Forward to @ref Tree::val_anchor(). Node must be readable. */ - C4_ALWAYS_INLINE C4_PURE csubstr val() const noexcept { _C4RV(); return tree_->val(id_); } - C4_ALWAYS_INLINE C4_PURE csubstr val_tag() const noexcept { _C4RV(); return tree_->val_tag(id_); } - C4_ALWAYS_INLINE C4_PURE csubstr val_ref() const noexcept { _C4RV(); return tree_->val_ref(id_); } - C4_ALWAYS_INLINE C4_PURE csubstr val_anchor() const noexcept { _C4RV(); return tree_->val_anchor(id_); } + C4_ALWAYS_INLINE NodeScalar const& keysc() const RYML_NOEXCEPT { _C4RR(); return tree_->keysc(id_); } /**< Forward to @ref Tree::keysc(). Node must be readable. */ + C4_ALWAYS_INLINE NodeScalar const& valsc() const RYML_NOEXCEPT { _C4RR(); return tree_->valsc(id_); } /**< Forward to @ref Tree::valsc(). Node must be readable. */ - C4_ALWAYS_INLINE C4_PURE NodeScalar const& keysc() const noexcept { _C4RV(); return tree_->keysc(id_); } - C4_ALWAYS_INLINE C4_PURE NodeScalar const& valsc() const noexcept { _C4RV(); return tree_->valsc(id_); } + C4_ALWAYS_INLINE bool key_is_null() const RYML_NOEXCEPT { _C4RR(); return tree_->key_is_null(id_); } /**< Forward to @ref Tree::key_is_null(). Node must be readable. */ + C4_ALWAYS_INLINE bool val_is_null() const RYML_NOEXCEPT { _C4RR(); return tree_->val_is_null(id_); } /**< Forward to @ref Tree::val_is_null(). Node must be readable. */ - C4_ALWAYS_INLINE C4_PURE bool key_is_null() const noexcept { _C4RV(); return tree_->key_is_null(id_); } - C4_ALWAYS_INLINE C4_PURE bool val_is_null() const noexcept { _C4RV(); return tree_->val_is_null(id_); } + C4_ALWAYS_INLINE bool is_key_unfiltered() const noexcept { _C4RR(); return tree_->is_key_unfiltered(id_); } /**< Forward to @ref Tree::is_key_unfiltered(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_val_unfiltered() const noexcept { _C4RR(); return tree_->is_val_unfiltered(id_); } /**< Forward to @ref Tree::is_val_unfiltered(). Node must be readable. */ /** @} */ public: - /** @name node property predicates */ + /** @name node type predicates */ /** @{ */ - C4_ALWAYS_INLINE C4_PURE bool empty() const noexcept { _C4RV(); return tree_->empty(id_); } - C4_ALWAYS_INLINE C4_PURE bool is_stream() const noexcept { _C4RV(); return tree_->is_stream(id_); } - C4_ALWAYS_INLINE C4_PURE bool is_doc() const noexcept { _C4RV(); return tree_->is_doc(id_); } - C4_ALWAYS_INLINE C4_PURE bool is_container() const noexcept { _C4RV(); return tree_->is_container(id_); } - C4_ALWAYS_INLINE C4_PURE bool is_map() const noexcept { _C4RV(); return tree_->is_map(id_); } - C4_ALWAYS_INLINE C4_PURE bool is_seq() const noexcept { _C4RV(); return tree_->is_seq(id_); } - C4_ALWAYS_INLINE C4_PURE bool has_val() const noexcept { _C4RV(); return tree_->has_val(id_); } - C4_ALWAYS_INLINE C4_PURE bool has_key() const noexcept { _C4RV(); return tree_->has_key(id_); } - C4_ALWAYS_INLINE C4_PURE bool is_val() const noexcept { _C4RV(); return tree_->is_val(id_); } - C4_ALWAYS_INLINE C4_PURE bool is_keyval() const noexcept { _C4RV(); return tree_->is_keyval(id_); } - C4_ALWAYS_INLINE C4_PURE bool has_key_tag() const noexcept { _C4RV(); return tree_->has_key_tag(id_); } - C4_ALWAYS_INLINE C4_PURE bool has_val_tag() const noexcept { _C4RV(); return tree_->has_val_tag(id_); } - C4_ALWAYS_INLINE C4_PURE bool has_key_anchor() const noexcept { _C4RV(); return tree_->has_key_anchor(id_); } - C4_ALWAYS_INLINE C4_PURE bool is_key_anchor() const noexcept { _C4RV(); return tree_->is_key_anchor(id_); } - C4_ALWAYS_INLINE C4_PURE bool has_val_anchor() const noexcept { _C4RV(); return tree_->has_val_anchor(id_); } - C4_ALWAYS_INLINE C4_PURE bool is_val_anchor() const noexcept { _C4RV(); return tree_->is_val_anchor(id_); } - C4_ALWAYS_INLINE C4_PURE bool has_anchor() const noexcept { _C4RV(); return tree_->has_anchor(id_); } - C4_ALWAYS_INLINE C4_PURE bool is_anchor() const noexcept { _C4RV(); return tree_->is_anchor(id_); } - C4_ALWAYS_INLINE C4_PURE bool is_key_ref() const noexcept { _C4RV(); return tree_->is_key_ref(id_); } - C4_ALWAYS_INLINE C4_PURE bool is_val_ref() const noexcept { _C4RV(); return tree_->is_val_ref(id_); } - C4_ALWAYS_INLINE C4_PURE bool is_ref() const noexcept { _C4RV(); return tree_->is_ref(id_); } - C4_ALWAYS_INLINE C4_PURE bool is_anchor_or_ref() const noexcept { _C4RV(); return tree_->is_anchor_or_ref(id_); } - C4_ALWAYS_INLINE C4_PURE bool is_key_quoted() const noexcept { _C4RV(); return tree_->is_key_quoted(id_); } - C4_ALWAYS_INLINE C4_PURE bool is_val_quoted() const noexcept { _C4RV(); return tree_->is_val_quoted(id_); } - C4_ALWAYS_INLINE C4_PURE bool is_quoted() const noexcept { _C4RV(); return tree_->is_quoted(id_); } - C4_ALWAYS_INLINE C4_PURE bool parent_is_seq() const noexcept { _C4RV(); return tree_->parent_is_seq(id_); } - C4_ALWAYS_INLINE C4_PURE bool parent_is_map() const noexcept { _C4RV(); return tree_->parent_is_map(id_); } + C4_ALWAYS_INLINE bool empty() const RYML_NOEXCEPT { _C4RR(); return tree_->empty(id_); } /**< Forward to @ref Tree::empty(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_stream() const RYML_NOEXCEPT { _C4RR(); return tree_->is_stream(id_); } /**< Forward to @ref Tree::is_stream(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_doc() const RYML_NOEXCEPT { _C4RR(); return tree_->is_doc(id_); } /**< Forward to @ref Tree::is_doc(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_container() const RYML_NOEXCEPT { _C4RR(); return tree_->is_container(id_); } /**< Forward to @ref Tree::is_container(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_map() const RYML_NOEXCEPT { _C4RR(); return tree_->is_map(id_); } /**< Forward to @ref Tree::is_map(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_seq() const RYML_NOEXCEPT { _C4RR(); return tree_->is_seq(id_); } /**< Forward to @ref Tree::is_seq(). Node must be readable. */ + C4_ALWAYS_INLINE bool has_val() const RYML_NOEXCEPT { _C4RR(); return tree_->has_val(id_); } /**< Forward to @ref Tree::has_val(). Node must be readable. */ + C4_ALWAYS_INLINE bool has_key() const RYML_NOEXCEPT { _C4RR(); return tree_->has_key(id_); } /**< Forward to @ref Tree::has_key(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_val() const RYML_NOEXCEPT { _C4RR(); return tree_->is_val(id_); } /**< Forward to @ref Tree::is_val(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_keyval() const RYML_NOEXCEPT { _C4RR(); return tree_->is_keyval(id_); } /**< Forward to @ref Tree::is_keyval(). Node must be readable. */ + C4_ALWAYS_INLINE bool has_key_tag() const RYML_NOEXCEPT { _C4RR(); return tree_->has_key_tag(id_); } /**< Forward to @ref Tree::has_key_tag(). Node must be readable. */ + C4_ALWAYS_INLINE bool has_val_tag() const RYML_NOEXCEPT { _C4RR(); return tree_->has_val_tag(id_); } /**< Forward to @ref Tree::has_val_tag(). Node must be readable. */ + C4_ALWAYS_INLINE bool has_key_anchor() const RYML_NOEXCEPT { _C4RR(); return tree_->has_key_anchor(id_); } /**< Forward to @ref Tree::has_key_anchor(). Node must be readable. */ + C4_ALWAYS_INLINE bool has_val_anchor() const RYML_NOEXCEPT { _C4RR(); return tree_->has_val_anchor(id_); } /**< Forward to @ref Tree::has_val_anchor(). Node must be readable. */ + C4_ALWAYS_INLINE bool has_anchor() const RYML_NOEXCEPT { _C4RR(); return tree_->has_anchor(id_); } /**< Forward to @ref Tree::has_anchor(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_key_ref() const RYML_NOEXCEPT { _C4RR(); return tree_->is_key_ref(id_); } /**< Forward to @ref Tree::is_key_ref(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_val_ref() const RYML_NOEXCEPT { _C4RR(); return tree_->is_val_ref(id_); } /**< Forward to @ref Tree::is_val_ref(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_ref() const RYML_NOEXCEPT { _C4RR(); return tree_->is_ref(id_); } /**< Forward to @ref Tree::is_ref(). Node must be readable. */ + C4_ALWAYS_INLINE bool parent_is_seq() const RYML_NOEXCEPT { _C4RR(); return tree_->parent_is_seq(id_); } /**< Forward to @ref Tree::parent_is_seq(). Node must be readable. */ + C4_ALWAYS_INLINE bool parent_is_map() const RYML_NOEXCEPT { _C4RR(); return tree_->parent_is_map(id_); } /**< Forward to @ref Tree::parent_is_map(). Node must be readable. */ + + RYML_DEPRECATED("use has_key_anchor()") bool is_key_anchor() const noexcept { _C4RR(); return tree_->has_key_anchor(id_); } + RYML_DEPRECATED("use has_val_anchor()") bool is_val_hanchor() const noexcept { _C4RR(); return tree_->has_val_anchor(id_); } + RYML_DEPRECATED("use has_anchor()") bool is_anchor() const noexcept { _C4RR(); return tree_->has_anchor(id_); } + RYML_DEPRECATED("use has_anchor() || is_ref()") bool is_anchor_or_ref() const noexcept { _C4RR(); return tree_->is_anchor_or_ref(id_); } /** @} */ public: - /** @name hierarchy predicates */ + /** @name node container+scalar style predicates */ /** @{ */ - C4_ALWAYS_INLINE C4_PURE bool is_root() const noexcept { _C4RV(); return tree_->is_root(id_); } - C4_ALWAYS_INLINE C4_PURE bool has_parent() const noexcept { _C4RV(); return tree_->has_parent(id_); } + // documentation to the right --> + + C4_ALWAYS_INLINE bool type_has_any(NodeType_e bits) const RYML_NOEXCEPT { _C4RR(); return tree_->type_has_any(id_, bits); } /**< Forward to @ref Tree::type_has_any(). Node must be readable. */ + C4_ALWAYS_INLINE bool type_has_all(NodeType_e bits) const RYML_NOEXCEPT { _C4RR(); return tree_->type_has_all(id_, bits); } /**< Forward to @ref Tree::type_has_all(). Node must be readable. */ + C4_ALWAYS_INLINE bool type_has_none(NodeType_e bits) const RYML_NOEXCEPT { _C4RR(); return tree_->type_has_none(id_, bits); } /**< Forward to @ref Tree::type_has_none(). Node must be readable. */ + + C4_ALWAYS_INLINE bool is_container_styled() const RYML_NOEXCEPT { _C4RR(); return tree_->is_container_styled(id_); } /**< Forward to @ref Tree::is_container_styled(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_block() const RYML_NOEXCEPT { _C4RR(); return tree_->is_block(id_); } /**< Forward to @ref Tree::is_block(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_flow_sl() const RYML_NOEXCEPT { _C4RR(); return tree_->is_flow_sl(id_); } /**< Forward to @ref Tree::is_flow_sl(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_flow_ml() const RYML_NOEXCEPT { _C4RR(); return tree_->is_flow_ml(id_); } /**< Forward to @ref Tree::is_flow_ml(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_flow() const RYML_NOEXCEPT { _C4RR(); return tree_->is_flow(id_); } /**< Forward to @ref Tree::is_flow(). Node must be readable. */ + + C4_ALWAYS_INLINE bool is_key_styled() const RYML_NOEXCEPT { _C4RR(); return tree_->is_key_styled(id_); } /**< Forward to @ref Tree::is_key_styled(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_val_styled() const RYML_NOEXCEPT { _C4RR(); return tree_->is_val_styled(id_); } /**< Forward to @ref Tree::is_val_styled(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_key_literal() const RYML_NOEXCEPT { _C4RR(); return tree_->is_key_literal(id_); } /**< Forward to @ref Tree::is_key_literal(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_val_literal() const RYML_NOEXCEPT { _C4RR(); return tree_->is_val_literal(id_); } /**< Forward to @ref Tree::is_val_literal(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_key_folded() const RYML_NOEXCEPT { _C4RR(); return tree_->is_key_folded(id_); } /**< Forward to @ref Tree::is_key_folded(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_val_folded() const RYML_NOEXCEPT { _C4RR(); return tree_->is_val_folded(id_); } /**< Forward to @ref Tree::is_val_folded(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_key_squo() const RYML_NOEXCEPT { _C4RR(); return tree_->is_key_squo(id_); } /**< Forward to @ref Tree::is_key_squo(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_val_squo() const RYML_NOEXCEPT { _C4RR(); return tree_->is_val_squo(id_); } /**< Forward to @ref Tree::is_val_squo(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_key_dquo() const RYML_NOEXCEPT { _C4RR(); return tree_->is_key_dquo(id_); } /**< Forward to @ref Tree::is_key_dquo(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_val_dquo() const RYML_NOEXCEPT { _C4RR(); return tree_->is_val_dquo(id_); } /**< Forward to @ref Tree::is_val_dquo(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_key_plain() const RYML_NOEXCEPT { _C4RR(); return tree_->is_key_plain(id_); } /**< Forward to @ref Tree::is_key_plain(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_val_plain() const RYML_NOEXCEPT { _C4RR(); return tree_->is_val_plain(id_); } /**< Forward to @ref Tree::is_val_plain(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_key_quoted() const RYML_NOEXCEPT { _C4RR(); return tree_->is_key_quoted(id_); } /**< Forward to @ref Tree::is_key_quoted(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_val_quoted() const RYML_NOEXCEPT { _C4RR(); return tree_->is_val_quoted(id_); } /**< Forward to @ref Tree::is_val_quoted(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_quoted() const RYML_NOEXCEPT { _C4RR(); return tree_->is_quoted(id_); } /**< Forward to @ref Tree::is_quoted(). Node must be readable. */ - C4_ALWAYS_INLINE C4_PURE bool has_child(ConstImpl const& ch) const noexcept { _C4RV(); return tree_->has_child(id_, ch.m_id); } - C4_ALWAYS_INLINE C4_PURE bool has_child(csubstr name) const noexcept { _C4RV(); return tree_->has_child(id_, name); } - C4_ALWAYS_INLINE C4_PURE bool has_children() const noexcept { _C4RV(); return tree_->has_children(id_); } + /** @} */ - C4_ALWAYS_INLINE C4_PURE bool has_sibling(ConstImpl const& n) const noexcept { _C4RV(); return tree_->has_sibling(id_, n.m_id); } - C4_ALWAYS_INLINE C4_PURE bool has_sibling(csubstr name) const noexcept { _C4RV(); return tree_->has_sibling(id_, name); } - /** counts with this */ - C4_ALWAYS_INLINE C4_PURE bool has_siblings() const noexcept { _C4RV(); return tree_->has_siblings(id_); } - /** does not count with this */ - C4_ALWAYS_INLINE C4_PURE bool has_other_siblings() const noexcept { _C4RV(); return tree_->has_other_siblings(id_); } +public: + + /** @name hierarchy predicates */ + /** @{ */ + + // documentation to the right --> + + C4_ALWAYS_INLINE bool is_root() const RYML_NOEXCEPT { _C4RR(); return tree_->is_root(id_); } /**< Forward to @ref Tree::is_root(). Node must be readable. */ + C4_ALWAYS_INLINE bool has_parent() const RYML_NOEXCEPT { _C4RR(); return tree_->has_parent(id_); } /**< Forward to @ref Tree::has_parent() Node must be readable. */ + + C4_ALWAYS_INLINE bool has_child(ConstImpl const& n) const RYML_NOEXCEPT { _C4RR(); return n.readable() ? tree_->has_child(id_, n.m_id) : false; } /**< Forward to @ref Tree::has_child(). Node must be readable. */ + C4_ALWAYS_INLINE bool has_child(id_type node) const RYML_NOEXCEPT { _C4RR(); return tree_->has_child(id_, node); } /**< Forward to @ref Tree::has_child(). Node must be readable. */ + C4_ALWAYS_INLINE bool has_child(csubstr name) const RYML_NOEXCEPT { _C4RR(); return tree_->has_child(id_, name); } /**< Forward to @ref Tree::has_child(). Node must be readable. */ + C4_ALWAYS_INLINE bool has_children() const RYML_NOEXCEPT { _C4RR(); return tree_->has_children(id_); } /**< Forward to @ref Tree::has_child(). Node must be readable. */ + + C4_ALWAYS_INLINE bool has_sibling(ConstImpl const& n) const RYML_NOEXCEPT { _C4RR(); return n.readable() ? tree_->has_sibling(id_, n.m_id) : false; } /**< Forward to @ref Tree::has_sibling(). Node must be readable. */ + C4_ALWAYS_INLINE bool has_sibling(id_type node) const RYML_NOEXCEPT { _C4RR(); return tree_->has_sibling(id_, node); } /**< Forward to @ref Tree::has_sibling(). Node must be readable. */ + C4_ALWAYS_INLINE bool has_sibling(csubstr name) const RYML_NOEXCEPT { _C4RR(); return tree_->has_sibling(id_, name); } /**< Forward to @ref Tree::has_sibling(). Node must be readable. */ + C4_ALWAYS_INLINE bool has_other_siblings() const RYML_NOEXCEPT { _C4RR(); return tree_->has_other_siblings(id_); } /**< Forward to @ref Tree::has_sibling(). Node must be readable. */ + + RYML_DEPRECATED("use has_other_siblings()") bool has_siblings() const RYML_NOEXCEPT { _C4RR(); return tree_->has_siblings(id_); } /** @} */ @@ -20581,137 +23014,328 @@ struct RoNodeMethods /** @name hierarchy getters */ /** @{ */ + // documentation to the right --> template - C4_ALWAYS_INLINE C4_PURE auto doc(size_t num) noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->doc(num)}; } - C4_ALWAYS_INLINE C4_PURE ConstImpl doc(size_t num) const noexcept { _C4RV(); return {tree_, tree_->doc(num)}; } - + C4_ALWAYS_INLINE auto doc(id_type i) RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { RYML_ASSERT(tree_); return {tree__, tree__->doc(i)}; } /**< Forward to @ref Tree::doc(). Node must be readable. */ + /** succeeds even when the node may have invalid or seed id */ + C4_ALWAYS_INLINE ConstImpl doc(id_type i) const RYML_NOEXCEPT { RYML_ASSERT(tree_); return {tree_, tree_->doc(i)}; } /**< Forward to @ref Tree::doc(). Node must be readable. */ template - C4_ALWAYS_INLINE C4_PURE auto parent() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->parent(id__)}; } - C4_ALWAYS_INLINE C4_PURE ConstImpl parent() const noexcept { _C4RV(); return {tree_, tree_->parent(id_)}; } - - - /** O(#num_children) */ - C4_ALWAYS_INLINE C4_PURE size_t child_pos(ConstImpl const& n) const noexcept { _C4RV(); return tree_->child_pos(id_, n.m_id); } - C4_ALWAYS_INLINE C4_PURE size_t num_children() const noexcept { _C4RV(); return tree_->num_children(id_); } + C4_ALWAYS_INLINE auto parent() RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->parent(id__)}; } /**< Forward to @ref Tree::parent(). Node must be readable. */ + C4_ALWAYS_INLINE ConstImpl parent() const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->parent(id_)}; } /**< Forward to @ref Tree::parent(). Node must be readable. */ template - C4_ALWAYS_INLINE C4_PURE auto first_child() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->first_child(id__)}; } - C4_ALWAYS_INLINE C4_PURE ConstImpl first_child() const noexcept { _C4RV(); return {tree_, tree_->first_child(id_)}; } + C4_ALWAYS_INLINE auto first_child() RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->first_child(id__)}; } /**< Forward to @ref Tree::first_child(). Node must be readable. */ + C4_ALWAYS_INLINE ConstImpl first_child() const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->first_child(id_)}; } /**< Forward to @ref Tree::first_child(). Node must be readable. */ template - C4_ALWAYS_INLINE C4_PURE auto last_child() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->last_child(id__)}; } - C4_ALWAYS_INLINE C4_PURE ConstImpl last_child () const noexcept { _C4RV(); return {tree_, tree_->last_child (id_)}; } + C4_ALWAYS_INLINE auto last_child() RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->last_child(id__)}; } /**< Forward to @ref Tree::last_child(). Node must be readable. */ + C4_ALWAYS_INLINE ConstImpl last_child () const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->last_child (id_)}; } /**< Forward to @ref Tree::last_child(). Node must be readable. */ template - C4_ALWAYS_INLINE C4_PURE auto child(size_t pos) noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->child(id__, pos)}; } - C4_ALWAYS_INLINE C4_PURE ConstImpl child(size_t pos) const noexcept { _C4RV(); return {tree_, tree_->child(id_, pos)}; } + C4_ALWAYS_INLINE auto child(id_type pos) RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->child(id__, pos)}; } /**< Forward to @ref Tree::child(). Node must be readable. */ + C4_ALWAYS_INLINE ConstImpl child(id_type pos) const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->child(id_, pos)}; } /**< Forward to @ref Tree::child(). Node must be readable. */ template - C4_ALWAYS_INLINE C4_PURE auto find_child(csubstr name) noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->find_child(id__, name)}; } - C4_ALWAYS_INLINE C4_PURE ConstImpl find_child(csubstr name) const noexcept { _C4RV(); return {tree_, tree_->find_child(id_, name)}; } + C4_ALWAYS_INLINE auto find_child(csubstr name) RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->find_child(id__, name)}; } /**< Forward to @ref Tree::first_child(). Node must be readable. */ + C4_ALWAYS_INLINE ConstImpl find_child(csubstr name) const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->find_child(id_, name)}; } /**< Forward to @ref Tree::first_child(). Node must be readable. */ + template + C4_ALWAYS_INLINE auto prev_sibling() RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->prev_sibling(id__)}; } /**< Forward to @ref Tree::prev_sibling(). Node must be readable. */ + C4_ALWAYS_INLINE ConstImpl prev_sibling() const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->prev_sibling(id_)}; } /**< Forward to @ref Tree::prev_sibling(). Node must be readable. */ - /** O(#num_siblings) */ - C4_ALWAYS_INLINE C4_PURE size_t num_siblings() const noexcept { _C4RV(); return tree_->num_siblings(id_); } - C4_ALWAYS_INLINE C4_PURE size_t num_other_siblings() const noexcept { _C4RV(); return tree_->num_other_siblings(id_); } - C4_ALWAYS_INLINE C4_PURE size_t sibling_pos(ConstImpl const& n) const noexcept { _C4RV(); return tree_->child_pos(tree_->parent(id_), n.m_id); } + template + C4_ALWAYS_INLINE auto next_sibling() RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->next_sibling(id__)}; } /**< Forward to @ref Tree::next_sibling(). Node must be readable. */ + C4_ALWAYS_INLINE ConstImpl next_sibling() const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->next_sibling(id_)}; } /**< Forward to @ref Tree::next_sibling(). Node must be readable. */ template - C4_ALWAYS_INLINE C4_PURE auto prev_sibling() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->prev_sibling(id__)}; } - C4_ALWAYS_INLINE C4_PURE ConstImpl prev_sibling() const noexcept { _C4RV(); return {tree_, tree_->prev_sibling(id_)}; } + C4_ALWAYS_INLINE auto first_sibling() RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->first_sibling(id__)}; } /**< Forward to @ref Tree::first_sibling(). Node must be readable. */ + C4_ALWAYS_INLINE ConstImpl first_sibling() const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->first_sibling(id_)}; } /**< Forward to @ref Tree::first_sibling(). Node must be readable. */ template - C4_ALWAYS_INLINE C4_PURE auto next_sibling() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->next_sibling(id__)}; } - C4_ALWAYS_INLINE C4_PURE ConstImpl next_sibling() const noexcept { _C4RV(); return {tree_, tree_->next_sibling(id_)}; } + C4_ALWAYS_INLINE auto last_sibling() RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->last_sibling(id__)}; } /**< Forward to @ref Tree::last_sibling(). Node must be readable. */ + C4_ALWAYS_INLINE ConstImpl last_sibling () const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->last_sibling(id_)}; } /**< Forward to @ref Tree::last_sibling(). Node must be readable. */ template - C4_ALWAYS_INLINE C4_PURE auto first_sibling() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->first_sibling(id__)}; } - C4_ALWAYS_INLINE C4_PURE ConstImpl first_sibling() const noexcept { _C4RV(); return {tree_, tree_->first_sibling(id_)}; } + C4_ALWAYS_INLINE auto sibling(id_type pos) RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->sibling(id__, pos)}; } /**< Forward to @ref Tree::sibling(). Node must be readable. */ + C4_ALWAYS_INLINE ConstImpl sibling(id_type pos) const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->sibling(id_, pos)}; } /**< Forward to @ref Tree::sibling(). Node must be readable. */ template - C4_ALWAYS_INLINE C4_PURE auto last_sibling() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->last_sibling(id__)}; } - C4_ALWAYS_INLINE C4_PURE ConstImpl last_sibling () const noexcept { _C4RV(); return {tree_, tree_->last_sibling(id_)}; } + C4_ALWAYS_INLINE auto find_sibling(csubstr name) RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->find_sibling(id__, name)}; } /**< Forward to @ref Tree::find_sibling(). Node must be readable. */ + C4_ALWAYS_INLINE ConstImpl find_sibling(csubstr name) const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->find_sibling(id_, name)}; } /**< Forward to @ref Tree::find_sibling(). Node must be readable. */ + + C4_ALWAYS_INLINE id_type num_children() const RYML_NOEXCEPT { _C4RR(); return tree_->num_children(id_); } /**< O(num_children). Forward to @ref Tree::num_children(). */ + C4_ALWAYS_INLINE id_type num_siblings() const RYML_NOEXCEPT { _C4RR(); return tree_->num_siblings(id_); } /**< O(num_children). Forward to @ref Tree::num_siblings(). */ + C4_ALWAYS_INLINE id_type num_other_siblings() const RYML_NOEXCEPT { _C4RR(); return tree_->num_other_siblings(id_); } /**< O(num_siblings). Forward to @ref Tree::num_other_siblings(). */ + C4_ALWAYS_INLINE id_type child_pos(ConstImpl const& n) const RYML_NOEXCEPT { _C4RR(); _RYML_CB_ASSERT(tree_->m_callbacks, n.readable()); return tree_->child_pos(id_, n.m_id); } /**< O(num_children). Forward to @ref Tree::child_pos(). */ + C4_ALWAYS_INLINE id_type sibling_pos(ConstImpl const& n) const RYML_NOEXCEPT { _C4RR(); _RYML_CB_ASSERT(tree_->callbacks(), n.readable()); return tree_->child_pos(tree_->parent(id_), n.m_id); } /**< O(num_siblings). Forward to @ref Tree::sibling_pos(). */ + + C4_ALWAYS_INLINE id_type depth_asc() const RYML_NOEXCEPT { _C4RR(); return tree_->depth_asc(id_); } /** O(log(num_nodes)). Forward to Tree::depth_asc(). Node must be readable. */ + C4_ALWAYS_INLINE id_type depth_desc() const RYML_NOEXCEPT { _C4RR(); return tree_->depth_desc(id_); } /** O(num_nodes). Forward to Tree::depth_desc(). Node must be readable. */ + + /** @} */ + +public: + /** @name square_brackets + * operator[] */ + /** @{ */ + + /** Find child by key; complexity is O(num_children). + * + * Returns the requested node, or an object in seed state if no + * such child is found (see @ref NodeRef for an explanation of + * what is seed state). When the object is in seed state, using it + * to read from the tree is UB. The seed node can be used to write + * to the tree provided that its create() method is called prior + * to writing, which happens in most modifying methods in + * NodeRef. It is the caller's responsibility to verify that the + * returned node is readable before subsequently using it to read + * from the tree. + * + * @warning the calling object must be readable. This precondition + * is asserted. The assertion is performed only if @ref + * RYML_USE_ASSERT is set to true. As with the non-const overload, + * it is UB to call this method if the node is not readable. + * + * @see https://github.com/biojppm/rapidyaml/issues/389 */ template - C4_ALWAYS_INLINE C4_PURE auto sibling(size_t pos) noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->sibling(id__, pos)}; } - C4_ALWAYS_INLINE C4_PURE ConstImpl sibling(size_t pos) const noexcept { _C4RV(); return {tree_, tree_->sibling(id_, pos)}; } + C4_ALWAYS_INLINE auto operator[] (csubstr key) RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) + { + _C4RR(); + id_type ch = tree__->find_child(id__, key); + return ch != NONE ? Impl(tree__, ch) : Impl(tree__, id__, key); + } + /** Find child by position; complexity is O(pos). + * + * Returns the requested node, or an object in seed state if no + * such child is found (see @ref NodeRef for an explanation of + * what is seed state). When the object is in seed state, using it + * to read from the tree is UB. The seed node can be used to write + * to the tree provided that its create() method is called prior + * to writing, which happens in most modifying methods in + * NodeRef. It is the caller's responsibility to verify that the + * returned node is readable before subsequently using it to read + * from the tree. + * + * @warning the calling object must be readable. This precondition + * is asserted. The assertion is performed only if @ref + * RYML_USE_ASSERT is set to true. As with the non-const overload, + * it is UB to call this method if the node is not readable. + * + * @see https://github.com/biojppm/rapidyaml/issues/389 */ template - C4_ALWAYS_INLINE C4_PURE auto find_sibling(csubstr name) noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->find_sibling(id__, name)}; } - C4_ALWAYS_INLINE C4_PURE ConstImpl find_sibling(csubstr name) const noexcept { _C4RV(); return {tree_, tree_->find_sibling(id_, name)}; } + C4_ALWAYS_INLINE auto operator[] (id_type pos) RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) + { + _C4RR(); + id_type ch = tree__->child(id__, pos); + return ch != NONE ? Impl(tree__, ch) : Impl(tree__, id__, pos); + } + /** Find a child by key; complexity is O(num_children). + * + * Behaves similar to the non-const overload, but further asserts + * that the returned node is readable (because it can never be in + * a seed state). The assertion is performed only if @ref + * RYML_USE_ASSERT is set to true. As with the non-const overload, + * it is UB to use the return value if it is not valid. + * + * @see https://github.com/biojppm/rapidyaml/issues/389 */ + C4_ALWAYS_INLINE ConstImpl operator[] (csubstr key) const RYML_NOEXCEPT + { + _C4RR(); + id_type ch = tree_->find_child(id_, key); + _RYML_CB_ASSERT(tree_->m_callbacks, ch != NONE); + return {tree_, ch}; + } - /** O(num_children) */ - C4_ALWAYS_INLINE C4_PURE ConstImpl operator[] (csubstr k) const noexcept + /** Find a child by position; complexity is O(pos). + * + * Behaves similar to the non-const overload, but further asserts + * that the returned node is readable (because it can never be in + * a seed state). This assertion is performed only if @ref + * RYML_USE_ASSERT is set to true. As with the non-const overload, + * it is UB to use the return value if it is not valid. + * + * @see https://github.com/biojppm/rapidyaml/issues/389 */ + C4_ALWAYS_INLINE ConstImpl operator[] (id_type pos) const RYML_NOEXCEPT { - _C4RV(); - size_t ch = tree_->find_child(id_, k); + _C4RR(); + id_type ch = tree_->child(id_, pos); _RYML_CB_ASSERT(tree_->m_callbacks, ch != NONE); return {tree_, ch}; } - /** Find child by key. O(num_children). returns a seed node if no such child is found. */ + + /** @} */ + +public: + + /** @name at + * + * These functions are the analogue to operator[], with the + * difference that they emit an error instead of an + * assertion. That is, if any of the pre or post conditions is + * violated, an error is always emitted (resulting in a call to + * the error callback). + * + * @{ */ + + /** Find child by key; complexity is O(num_children). + * + * Returns the requested node, or an object in seed state if no + * such child is found (see @ref NodeRef for an explanation of + * what is seed state). When the object is in seed state, using it + * to read from the tree is UB. The seed node can be subsequently + * used to write to the tree provided that its create() method is + * called prior to writing, which happens inside most mutating + * methods in NodeRef. It is the caller's responsibility to verify + * that the returned node is readable before subsequently using it + * to read from the tree. + * + * @warning This method will call the error callback (regardless + * of build type or of the value of RYML_USE_ASSERT) whenever any + * of the following preconditions is violated: a) the object is + * valid (points at a tree and a node), b) the calling object must + * be readable (must not be in seed state), c) the calling object + * must be pointing at a MAP node. The preconditions are similar + * to the non-const operator[](csubstr), but instead of using + * assertions, this function directly checks those conditions and + * calls the error callback if any of the checks fail. + * + * @note since it is valid behavior for the returned node to be in + * seed state, the error callback is not invoked when this + * happens. */ template - C4_ALWAYS_INLINE C4_PURE auto operator[] (csubstr k) noexcept -> _C4_IF_MUTABLE(Impl) + C4_ALWAYS_INLINE auto at(csubstr key) -> _C4_IF_MUTABLE(Impl) { - _C4RV(); - size_t ch = tree__->find_child(id__, k); - return ch != NONE ? Impl(tree__, ch) : NodeRef(tree__, id__, k); + RYML_CHECK(tree_ != nullptr); + _RYML_CB_CHECK(tree_->m_callbacks, (id_ >= 0 && id_ < tree_->capacity())); + _RYML_CB_CHECK(tree_->m_callbacks, ((Impl const*)this)->readable()); + _RYML_CB_CHECK(tree_->m_callbacks, tree_->is_map(id_)); + id_type ch = tree__->find_child(id__, key); + return ch != NONE ? Impl(tree__, ch) : Impl(tree__, id__, key); } - /** O(num_children) */ - C4_ALWAYS_INLINE C4_PURE ConstImpl operator[] (size_t pos) const noexcept + /** Find child by position; complexity is O(pos). + * + * Returns the requested node, or an object in seed state if no + * such child is found (see @ref NodeRef for an explanation of + * what is seed state). When the object is in seed state, using it + * to read from the tree is UB. The seed node can be used to write + * to the tree provided that its create() method is called prior + * to writing, which happens in most modifying methods in + * NodeRef. It is the caller's responsibility to verify that the + * returned node is readable before subsequently using it to read + * from the tree. + * + * @warning This method will call the error callback (regardless + * of build type or of the value of RYML_USE_ASSERT) whenever any + * of the following preconditions is violated: a) the object is + * valid (points at a tree and a node), b) the calling object must + * be readable (must not be in seed state), c) the calling object + * must be pointing at a MAP node. The preconditions are similar + * to the non-const operator[](id_type), but instead of using + * assertions, this function directly checks those conditions and + * calls the error callback if any of the checks fail. + * + * @note since it is valid behavior for the returned node to be in + * seed state, the error callback is not invoked when this + * happens. */ + template + C4_ALWAYS_INLINE auto at(id_type pos) -> _C4_IF_MUTABLE(Impl) { - _C4RV(); - size_t ch = tree_->child(id_, pos); - _RYML_CB_ASSERT(tree_->m_callbacks, ch != NONE); + RYML_CHECK(tree_ != nullptr); + const id_type cap = tree_->capacity(); + _RYML_CB_CHECK(tree_->m_callbacks, (id_ >= 0 && id_ < cap)); + _RYML_CB_CHECK(tree_->m_callbacks, (pos >= 0 && pos < cap)); + _RYML_CB_CHECK(tree_->m_callbacks, ((Impl const*)this)->readable()); + _RYML_CB_CHECK(tree_->m_callbacks, tree_->is_container(id_)); + id_type ch = tree__->child(id__, pos); + return ch != NONE ? Impl(tree__, ch) : Impl(tree__, id__, pos); + } + + /** Get a child by name, with error checking; complexity is + * O(num_children). + * + * Behaves as operator[](csubstr) const, but always raises an + * error (even when RYML_USE_ASSERT is set to false) when the + * returned node does not exist, or when this node is not + * readable, or when it is not a map. This behaviour is similar to + * std::vector::at(), but the error consists in calling the error + * callback instead of directly raising an exception. */ + ConstImpl at(csubstr key) const + { + RYML_CHECK(tree_ != nullptr); + _RYML_CB_CHECK(tree_->m_callbacks, (id_ >= 0 && id_ < tree_->capacity())); + _RYML_CB_CHECK(tree_->m_callbacks, ((Impl const*)this)->readable()); + _RYML_CB_CHECK(tree_->m_callbacks, tree_->is_map(id_)); + id_type ch = tree_->find_child(id_, key); + _RYML_CB_CHECK(tree_->m_callbacks, ch != NONE); return {tree_, ch}; } - /** Find child by position. O(pos). returns a seed node if no such child is found. */ - template - C4_ALWAYS_INLINE C4_PURE auto operator[] (size_t pos) noexcept -> _C4_IF_MUTABLE(Impl) - { - _C4RV(); - size_t ch = tree__->child(id__, pos); - return ch != NONE ? Impl(tree__, ch) : NodeRef(tree__, id__, pos); + /** Get a child by position, with error checking; complexity is + * O(pos). + * + * Behaves as operator[](id_type) const, but always raises an error + * (even when RYML_USE_ASSERT is set to false) when the returned + * node does not exist, or when this node is not readable, or when + * it is not a container. This behaviour is similar to + * std::vector::at(), but the error consists in calling the error + * callback instead of directly raising an exception. */ + ConstImpl at(id_type pos) const + { + RYML_CHECK(tree_ != nullptr); + const id_type cap = tree_->capacity(); + _RYML_CB_CHECK(tree_->m_callbacks, (id_ >= 0 && id_ < cap)); + _RYML_CB_CHECK(tree_->m_callbacks, (pos >= 0 && pos < cap)); + _RYML_CB_CHECK(tree_->m_callbacks, ((Impl const*)this)->readable()); + _RYML_CB_CHECK(tree_->m_callbacks, tree_->is_container(id_)); + const id_type ch = tree_->child(id_, pos); + _RYML_CB_CHECK(tree_->m_callbacks, ch != NONE); + return {tree_, ch}; } /** @} */ public: - /** deserialization */ + /** @name deserialization */ /** @{ */ + /** deserialize the node's val to the given variable, forwarding + * to the user-overrideable @ref read() function. */ template ConstImpl const& operator>> (T &v) const { - _C4RV(); + _C4RR(); if( ! read((ConstImpl const&)*this, &v)) _RYML_CB_ERR(tree_->m_callbacks, "could not deserialize value"); return *((ConstImpl const*)this); } - /** deserialize the node's key to the given variable */ + /** deserialize the node's key to the given variable, forwarding + * to the user-overrideable @ref read() function; use @ref key() + * to disambiguate; for example: `node >> ryml::key(var)` */ template ConstImpl const& operator>> (Key v) const { - _C4RV(); - if( ! from_chars(key(), &v.k)) + _C4RR(); + if(key().empty() || ! from_chars(key(), &v.k)) _RYML_CB_ERR(tree_->m_callbacks, "could not deserialize key"); return *((ConstImpl const*)this); } - /** deserialize the node's key as base64 */ + /** deserialize the node's key as base64. lightweight wrapper over @ref deserialize_key() */ ConstImpl const& operator>> (Key w) const { deserialize_key(w.wrapper); return *((ConstImpl const*)this); } - /** deserialize the node's val as base64 */ + /** deserialize the node's val as base64. lightweight wrapper over @ref deserialize_val() */ ConstImpl const& operator>> (fmt::base64_wrapper w) const { deserialize_val(w); @@ -20723,7 +23347,7 @@ struct RoNodeMethods * @return the size of base64-decoded blob */ size_t deserialize_key(fmt::base64_wrapper v) const { - _C4RV(); + _C4RR(); return from_chars(key(), &v); } /** decode the base64-encoded key and assign the @@ -20731,25 +23355,32 @@ struct RoNodeMethods * @return the size of base64-decoded blob */ size_t deserialize_val(fmt::base64_wrapper v) const { - _C4RV(); + _C4RR(); return from_chars(val(), &v); }; + /** look for a child by name, if it exists assign to var. return + * true if the child existed. */ template bool get_if(csubstr name, T *var) const { - auto ch = find_child(name); - if(!ch.valid()) + _C4RR(); + ConstImpl ch = find_child(name); + if(!ch.readable()) return false; ch >> *var; return true; } + /** look for a child by name, if it exists assign to var, + * otherwise default to fallback. return true if the child + * existed. */ template bool get_if(csubstr name, T *var, T const& fallback) const { - auto ch = find_child(name); - if(ch.valid()) + _C4RR(); + ConstImpl ch = find_child(name); + if(ch.readable()) { ch >> *var; return true; @@ -20783,29 +23414,35 @@ struct RoNodeMethods using children_view = detail::children_view_; using const_children_view = detail::children_view_; + /** get an iterator to the first child */ template - C4_ALWAYS_INLINE C4_PURE auto begin() noexcept -> _C4_IF_MUTABLE(iterator) { _C4RV(); return iterator(tree__, tree__->first_child(id__)); } - C4_ALWAYS_INLINE C4_PURE const_iterator begin() const noexcept { _C4RV(); return const_iterator(tree_, tree_->first_child(id_)); } - C4_ALWAYS_INLINE C4_PURE const_iterator cbegin() const noexcept { _C4RV(); return const_iterator(tree_, tree_->first_child(id_)); } + C4_ALWAYS_INLINE auto begin() RYML_NOEXCEPT -> _C4_IF_MUTABLE(iterator) { _C4RR(); return iterator(tree__, tree__->first_child(id__)); } + /** get an iterator to the first child */ + C4_ALWAYS_INLINE const_iterator begin() const RYML_NOEXCEPT { _C4RR(); return const_iterator(tree_, tree_->first_child(id_)); } + /** get an iterator to the first child */ + C4_ALWAYS_INLINE const_iterator cbegin() const RYML_NOEXCEPT { _C4RR(); return const_iterator(tree_, tree_->first_child(id_)); } + /** get an iterator to after the last child */ template - C4_ALWAYS_INLINE C4_PURE auto end() noexcept -> _C4_IF_MUTABLE(iterator) { _C4RV(); return iterator(tree__, NONE); } - C4_ALWAYS_INLINE C4_PURE const_iterator end() const noexcept { _C4RV(); return const_iterator(tree_, NONE); } - C4_ALWAYS_INLINE C4_PURE const_iterator cend() const noexcept { _C4RV(); return const_iterator(tree_, tree_->first_child(id_)); } + C4_ALWAYS_INLINE auto end() RYML_NOEXCEPT -> _C4_IF_MUTABLE(iterator) { _C4RR(); return iterator(tree__, NONE); } + /** get an iterator to after the last child */ + C4_ALWAYS_INLINE const_iterator end() const RYML_NOEXCEPT { _C4RR(); return const_iterator(tree_, NONE); } + /** get an iterator to after the last child */ + C4_ALWAYS_INLINE const_iterator cend() const RYML_NOEXCEPT { _C4RR(); return const_iterator(tree_, tree_->first_child(id_)); } /** get an iterable view over children */ template - C4_ALWAYS_INLINE C4_PURE auto children() noexcept -> _C4_IF_MUTABLE(children_view) { _C4RV(); return children_view(begin(), end()); } + C4_ALWAYS_INLINE auto children() RYML_NOEXCEPT -> _C4_IF_MUTABLE(children_view) { _C4RR(); return children_view(begin(), end()); } /** get an iterable view over children */ - C4_ALWAYS_INLINE C4_PURE const_children_view children() const noexcept { _C4RV(); return const_children_view(begin(), end()); } + C4_ALWAYS_INLINE const_children_view children() const RYML_NOEXCEPT { _C4RR(); return const_children_view(begin(), end()); } /** get an iterable view over children */ - C4_ALWAYS_INLINE C4_PURE const_children_view cchildren() const noexcept { _C4RV(); return const_children_view(begin(), end()); } + C4_ALWAYS_INLINE const_children_view cchildren() const RYML_NOEXCEPT { _C4RR(); return const_children_view(begin(), end()); } /** get an iterable view over all siblings (including the calling node) */ template - C4_ALWAYS_INLINE C4_PURE auto siblings() noexcept -> _C4_IF_MUTABLE(children_view) + C4_ALWAYS_INLINE auto siblings() RYML_NOEXCEPT -> _C4_IF_MUTABLE(children_view) { - _C4RV(); + _C4RR(); NodeData const *nd = tree__->get(id__); return (nd->m_parent != NONE) ? // does it have a parent? children_view(iterator(tree__, tree_->get(nd->m_parent)->m_first_child), iterator(tree__, NONE)) @@ -20813,9 +23450,9 @@ struct RoNodeMethods children_view(end(), end()); } /** get an iterable view over all siblings (including the calling node) */ - C4_ALWAYS_INLINE C4_PURE const_children_view siblings() const noexcept + C4_ALWAYS_INLINE const_children_view siblings() const RYML_NOEXCEPT { - _C4RV(); + _C4RR(); NodeData const *nd = tree_->get(id_); return (nd->m_parent != NONE) ? // does it have a parent? const_children_view(const_iterator(tree_, tree_->get(nd->m_parent)->m_first_child), const_iterator(tree_, NONE)) @@ -20823,33 +23460,37 @@ struct RoNodeMethods const_children_view(end(), end()); } /** get an iterable view over all siblings (including the calling node) */ - C4_ALWAYS_INLINE C4_PURE const_children_view csiblings() const noexcept { return siblings(); } + C4_ALWAYS_INLINE const_children_view csiblings() const RYML_NOEXCEPT { return siblings(); } /** visit every child node calling fn(node) */ template - C4_ALWAYS_INLINE C4_PURE bool visit(Visitor fn, size_t indentation_level=0, bool skip_root=true) const noexcept + bool visit(Visitor fn, id_type indentation_level=0, bool skip_root=true) const RYML_NOEXCEPT { - return detail::_visit(*(ConstImpl*)this, fn, indentation_level, skip_root); + _C4RR(); + return detail::_visit(*(ConstImpl const*)this, fn, indentation_level, skip_root); } /** visit every child node calling fn(node) */ template - auto visit(Visitor fn, size_t indentation_level=0, bool skip_root=true) noexcept + auto visit(Visitor fn, id_type indentation_level=0, bool skip_root=true) RYML_NOEXCEPT -> _C4_IF_MUTABLE(bool) { + _C4RR(); return detail::_visit(*(Impl*)this, fn, indentation_level, skip_root); } /** visit every child node calling fn(node, level) */ template - C4_ALWAYS_INLINE C4_PURE bool visit_stacked(Visitor fn, size_t indentation_level=0, bool skip_root=true) const noexcept + bool visit_stacked(Visitor fn, id_type indentation_level=0, bool skip_root=true) const RYML_NOEXCEPT { - return detail::_visit_stacked(*(ConstImpl*)this, fn, indentation_level, skip_root); + _C4RR(); + return detail::_visit_stacked(*(ConstImpl const*)this, fn, indentation_level, skip_root); } /** visit every child node calling fn(node, level) */ template - auto visit_stacked(Visitor fn, size_t indentation_level=0, bool skip_root=true) noexcept + auto visit_stacked(Visitor fn, id_type indentation_level=0, bool skip_root=true) RYML_NOEXCEPT -> _C4_IF_MUTABLE(bool) { + _C4RR(); return detail::_visit_stacked(*(Impl*)this, fn, indentation_level, skip_root); } @@ -20862,7 +23503,7 @@ struct RoNodeMethods #endif #undef _C4_IF_MUTABLE - #undef _C4RV + #undef _C4RR #undef tree_ #undef tree__ #undef id_ @@ -20870,13 +23511,17 @@ struct RoNodeMethods C4_SUPPRESS_WARNING_GCC_CLANG_POP }; - -} // namespace detail +} // detail //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- +/** Holds a pointer to an existing tree, and a node id. It can be used + * only to read from the tree. + * + * @warning The lifetime of the tree must be larger than that of this + * object. It is up to the user to ensure that this happens. */ class RYML_EXPORT ConstNodeRef : public detail::RoNodeMethods { public: @@ -20886,7 +23531,7 @@ class RYML_EXPORT ConstNodeRef : public detail::RoNodeMethods; @@ -20896,17 +23541,17 @@ class RYML_EXPORT ConstNodeRef : public detail::RoNodeMethodsroot_id()) {} - ConstNodeRef(Tree const *t, size_t id) : m_tree(t), m_id(id) {} - ConstNodeRef(std::nullptr_t) : m_tree(nullptr), m_id(NONE) {} + ConstNodeRef() noexcept : m_tree(nullptr), m_id(NONE) {} + ConstNodeRef(Tree const &t) noexcept : m_tree(&t), m_id(t .root_id()) {} + ConstNodeRef(Tree const *t) noexcept : m_tree(t ), m_id(t->root_id()) {} + ConstNodeRef(Tree const *t, id_type id) noexcept : m_tree(t), m_id(id) {} + ConstNodeRef(std::nullptr_t) noexcept : m_tree(nullptr), m_id(NONE) {} - ConstNodeRef(ConstNodeRef const&) = default; - ConstNodeRef(ConstNodeRef &&) = default; + ConstNodeRef(ConstNodeRef const&) noexcept = default; + ConstNodeRef(ConstNodeRef &&) noexcept = default; - ConstNodeRef(NodeRef const&); - ConstNodeRef(NodeRef &&); + ConstNodeRef(NodeRef const&) noexcept; + ConstNodeRef(NodeRef &&) noexcept; /** @} */ @@ -20915,23 +23560,33 @@ class RYML_EXPORT ConstNodeRef : public detail::RoNodeMethodsoperator==(that); } + C4_ALWAYS_INLINE bool operator== (ConstNodeRef const& that) const RYML_NOEXCEPT { return that.m_tree == m_tree && m_id == that.m_id; } + C4_ALWAYS_INLINE bool operator!= (ConstNodeRef const& that) const RYML_NOEXCEPT { return ! this->operator== (that); } - C4_ALWAYS_INLINE C4_PURE bool operator== (std::nullptr_t) const noexcept { return m_tree == nullptr || m_id == NONE; } - C4_ALWAYS_INLINE C4_PURE bool operator!= (std::nullptr_t) const noexcept { return ! this->operator== (nullptr); } + /** @cond dev */ + RYML_DEPRECATED("use invalid()") bool operator== (std::nullptr_t) const noexcept { return m_tree == nullptr || m_id == NONE; } + RYML_DEPRECATED("use !invalid()") bool operator!= (std::nullptr_t) const noexcept { return !(m_tree == nullptr || m_id == NONE); } - C4_ALWAYS_INLINE C4_PURE bool operator== (csubstr val) const noexcept { RYML_ASSERT(has_val()); return m_tree->val(m_id) == val; } - C4_ALWAYS_INLINE C4_PURE bool operator!= (csubstr val) const noexcept { RYML_ASSERT(has_val()); return m_tree->val(m_id) != val; } + RYML_DEPRECATED("use (this->val() == s)") bool operator== (csubstr s) const RYML_NOEXCEPT { RYML_ASSERT(m_tree); _RYML_CB_ASSERT(m_tree->m_callbacks, m_id != NONE); return m_tree->val(m_id) == s; } + RYML_DEPRECATED("use (this->val() != s)") bool operator!= (csubstr s) const RYML_NOEXCEPT { RYML_ASSERT(m_tree); _RYML_CB_ASSERT(m_tree->m_callbacks, m_id != NONE); return m_tree->val(m_id) != s; } + /** @endcond */ /** @} */ @@ -20968,8 +23625,42 @@ class RYML_EXPORT ConstNodeRef : public detail::RoNodeMethods { public: @@ -20980,7 +23671,7 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods private: Tree *C4_RESTRICT m_tree; - size_t m_id; + id_type m_id; /** This member is used to enable lazy operator[] writing. When a child * with a key or index is not found, m_id is set to the id of the parent @@ -20998,22 +23689,28 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods friend struct detail::RoNodeMethods; // require valid: a helper macro, undefined at the end - #define _C4RV() \ + #define _C4RR() \ RYML_ASSERT(m_tree != nullptr); \ _RYML_CB_ASSERT(m_tree->m_callbacks, m_id != NONE && !is_seed()) + // require id: a helper macro, undefined at the end + #define _C4RID() \ + RYML_ASSERT(m_tree != nullptr); \ + _RYML_CB_ASSERT(m_tree->m_callbacks, m_id != NONE) public: /** @name construction */ /** @{ */ - NodeRef() : m_tree(nullptr), m_id(NONE), m_seed() { _clear_seed(); } - NodeRef(Tree &t) : m_tree(&t), m_id(t .root_id()), m_seed() { _clear_seed(); } - NodeRef(Tree *t) : m_tree(t ), m_id(t->root_id()), m_seed() { _clear_seed(); } - NodeRef(Tree *t, size_t id) : m_tree(t), m_id(id), m_seed() { _clear_seed(); } - NodeRef(Tree *t, size_t id, size_t seed_pos) : m_tree(t), m_id(id), m_seed() { m_seed.str = nullptr; m_seed.len = seed_pos; } - NodeRef(Tree *t, size_t id, csubstr seed_key) : m_tree(t), m_id(id), m_seed(seed_key) {} - NodeRef(std::nullptr_t) : m_tree(nullptr), m_id(NONE), m_seed() {} + NodeRef() noexcept : m_tree(nullptr), m_id(NONE), m_seed() { _clear_seed(); } + NodeRef(Tree &t) noexcept : m_tree(&t), m_id(t .root_id()), m_seed() { _clear_seed(); } + NodeRef(Tree *t) noexcept : m_tree(t ), m_id(t->root_id()), m_seed() { _clear_seed(); } + NodeRef(Tree *t, id_type id) noexcept : m_tree(t), m_id(id), m_seed() { _clear_seed(); } + NodeRef(Tree *t, id_type id, id_type seed_pos) noexcept : m_tree(t), m_id(id), m_seed() { m_seed.str = nullptr; m_seed.len = (size_t)seed_pos; } + NodeRef(Tree *t, id_type id, csubstr seed_key) noexcept : m_tree(t), m_id(id), m_seed(seed_key) {} + NodeRef(std::nullptr_t) noexcept : m_tree(nullptr), m_id(NONE), m_seed() {} + + inline void _clear_seed() noexcept { /*do the following manually or an assert is triggered: */ m_seed.str = nullptr; m_seed.len = npos; } /** @} */ @@ -21022,23 +23719,27 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods /** @name assignment */ /** @{ */ - NodeRef(NodeRef const&) = default; - NodeRef(NodeRef &&) = default; + NodeRef(NodeRef const&) noexcept = default; + NodeRef(NodeRef &&) noexcept = default; - NodeRef& operator= (NodeRef const&) = default; - NodeRef& operator= (NodeRef &&) = default; + NodeRef& operator= (NodeRef const&) noexcept = default; + NodeRef& operator= (NodeRef &&) noexcept = default; /** @} */ public: - /** @name state queries */ - /** @{ */ + /** @name state_queries + * @{ */ - inline bool valid() const { return m_tree != nullptr && m_id != NONE; } - inline bool is_seed() const { return m_seed.str != nullptr || m_seed.len != NONE; } + /** true if the object is not referring to any existing or seed node. @see the doc for @ref NodeRef */ + inline bool invalid() const noexcept { return m_tree == nullptr || m_id == NONE; } + /** true if the object is not invalid and in seed state. @see the doc for @ref NodeRef */ + inline bool is_seed() const noexcept { return (m_tree != NULL && m_id != NONE) && (m_seed.str != nullptr || m_seed.len != (size_t)NONE); } + /** true if the object is not invalid and not in seed state. @see the doc for @ref NodeRef */ + inline bool readable() const noexcept { return (m_tree != NULL && m_id != NONE) && (m_seed.str == nullptr && m_seed.len == (size_t)NONE); } - inline void _clear_seed() { /*do this manually or an assert is triggered*/ m_seed.str = nullptr; m_seed.len = NONE; } + RYML_DEPRECATED("use one of readable(), is_seed() or !invalid()") inline bool valid() const { return m_tree != nullptr && m_id != NONE; } /** @} */ @@ -21047,80 +23748,71 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods /** @name comparisons */ /** @{ */ - inline bool operator== (NodeRef const& that) const { _C4RV(); RYML_ASSERT(that.valid() && !that.is_seed()); RYML_ASSERT(that.m_tree == m_tree); return m_id == that.m_id; } + bool operator== (NodeRef const& that) const + { + if(m_tree == that.m_tree && m_id == that.m_id) + { + bool seed = is_seed(); + if(seed == that.is_seed()) + { + if(seed) + { + return (m_seed.len == that.m_seed.len) + && (m_seed.str == that.m_seed.str + || m_seed == that.m_seed); // do strcmp only in the last resort + } + return true; + } + } + return false; + } inline bool operator!= (NodeRef const& that) const { return ! this->operator==(that); } - inline bool operator== (ConstNodeRef const& that) const { _C4RV(); RYML_ASSERT(that.valid()); RYML_ASSERT(that.m_tree == m_tree); return m_id == that.m_id; } + inline bool operator== (ConstNodeRef const& that) const { return m_tree == that.m_tree && m_id == that.m_id && !is_seed(); } inline bool operator!= (ConstNodeRef const& that) const { return ! this->operator==(that); } - inline bool operator== (std::nullptr_t) const { return m_tree == nullptr || m_id == NONE || is_seed(); } - inline bool operator!= (std::nullptr_t) const { return m_tree != nullptr && m_id != NONE && !is_seed(); } - - inline bool operator== (csubstr val) const { _C4RV(); RYML_ASSERT(has_val()); return m_tree->val(m_id) == val; } - inline bool operator!= (csubstr val) const { _C4RV(); RYML_ASSERT(has_val()); return m_tree->val(m_id) != val; } - - //inline operator bool () const { return m_tree == nullptr || m_id == NONE || is_seed(); } + /** @cond dev */ + RYML_DEPRECATED("use !readable()") bool operator== (std::nullptr_t) const { return m_tree == nullptr || m_id == NONE || is_seed(); } + RYML_DEPRECATED("use readable()") bool operator!= (std::nullptr_t) const { return !(m_tree == nullptr || m_id == NONE || is_seed()); } - /** @} */ + RYML_DEPRECATED("use `this->val() == s`") bool operator== (csubstr s) const { _C4RR(); _RYML_CB_ASSERT(m_tree->m_callbacks, has_val()); return m_tree->val(m_id) == s; } + RYML_DEPRECATED("use `this->val() != s`") bool operator!= (csubstr s) const { _C4RR(); _RYML_CB_ASSERT(m_tree->m_callbacks, has_val()); return m_tree->val(m_id) != s; } + /** @endcond */ public: - /** @name node property getters */ - /** @{ */ + /** @name node_property_getters + * @{ */ - C4_ALWAYS_INLINE C4_PURE Tree * tree() noexcept { return m_tree; } - C4_ALWAYS_INLINE C4_PURE Tree const* tree() const noexcept { return m_tree; } + C4_ALWAYS_INLINE Tree * tree() noexcept { return m_tree; } + C4_ALWAYS_INLINE Tree const* tree() const noexcept { return m_tree; } - C4_ALWAYS_INLINE C4_PURE size_t id() const noexcept { return m_id; } + C4_ALWAYS_INLINE id_type id() const noexcept { return m_id; } /** @} */ public: - /** @name node modifiers */ + /** @name node_modifiers */ /** @{ */ - void change_type(NodeType t) { _C4RV(); m_tree->change_type(m_id, t); } + void create() { _apply_seed(); } - void set_type(NodeType t) { _C4RV(); m_tree->_set_flags(m_id, t); } - void set_key(csubstr key) { _C4RV(); m_tree->_set_key(m_id, key); } - void set_val(csubstr val) { _C4RV(); m_tree->_set_val(m_id, val); } - void set_key_tag(csubstr key_tag) { _C4RV(); m_tree->set_key_tag(m_id, key_tag); } - void set_val_tag(csubstr val_tag) { _C4RV(); m_tree->set_val_tag(m_id, val_tag); } - void set_key_anchor(csubstr key_anchor) { _C4RV(); m_tree->set_key_anchor(m_id, key_anchor); } - void set_val_anchor(csubstr val_anchor) { _C4RV(); m_tree->set_val_anchor(m_id, val_anchor); } - void set_key_ref(csubstr key_ref) { _C4RV(); m_tree->set_key_ref(m_id, key_ref); } - void set_val_ref(csubstr val_ref) { _C4RV(); m_tree->set_val_ref(m_id, val_ref); } + void change_type(NodeType t) { _C4RR(); m_tree->change_type(m_id, t); } - template - size_t set_key_serialized(T const& C4_RESTRICT k) - { - _C4RV(); - csubstr s = m_tree->to_arena(k); - m_tree->_set_key(m_id, s); - return s.len; - } - template - size_t set_val_serialized(T const& C4_RESTRICT v) - { - _C4RV(); - csubstr s = m_tree->to_arena(v); - m_tree->_set_val(m_id, s); - return s.len; - } - size_t set_val_serialized(std::nullptr_t) - { - _C4RV(); - m_tree->_set_val(m_id, csubstr{}); - return 0; - } + void set_type(NodeType t) { _apply_seed(); m_tree->_set_flags(m_id, t); } + void set_key(csubstr key) { _apply_seed(); m_tree->_set_key(m_id, key); } + void set_val(csubstr val) { _apply_seed(); m_tree->_set_val(m_id, val); } + void set_key_tag(csubstr key_tag) { _apply_seed(); m_tree->set_key_tag(m_id, key_tag); } + void set_val_tag(csubstr val_tag) { _apply_seed(); m_tree->set_val_tag(m_id, val_tag); } + void set_key_anchor(csubstr key_anchor) { _apply_seed(); m_tree->set_key_anchor(m_id, key_anchor); } + void set_val_anchor(csubstr val_anchor) { _apply_seed(); m_tree->set_val_anchor(m_id, val_anchor); } + void set_key_ref(csubstr key_ref) { _apply_seed(); m_tree->set_key_ref(m_id, key_ref); } + void set_val_ref(csubstr val_ref) { _apply_seed(); m_tree->set_val_ref(m_id, val_ref); } - /** encode a blob as base64, then assign the result to the node's key - * @return the size of base64-encoded blob */ - size_t set_key_serialized(fmt::const_base64_wrapper w); - /** encode a blob as base64, then assign the result to the node's val - * @return the size of base64-encoded blob */ - size_t set_val_serialized(fmt::const_base64_wrapper w); + void set_container_style(NodeType_e style) { _C4RR(); m_tree->set_container_style(m_id, style); } + void set_key_style(NodeType_e style) { _C4RR(); m_tree->set_key_style(m_id, style); } + void set_val_style(NodeType_e style) { _C4RR(); m_tree->set_val_style(m_id, style); } public: @@ -21153,8 +23845,6 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods m_tree->remove_children(m_id); } - void create() { _apply_seed(); } - inline void operator= (NodeType_e t) { _apply_seed(); @@ -21211,10 +23901,49 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods template inline csubstr to_arena(T const& C4_RESTRICT s) { - _C4RV(); + RYML_ASSERT(m_tree); // no need for valid or readable return m_tree->to_arena(s); } + template + size_t set_key_serialized(T const& C4_RESTRICT k) + { + _apply_seed(); + csubstr s = m_tree->to_arena(k); + m_tree->_set_key(m_id, s); + return s.len; + } + size_t set_key_serialized(std::nullptr_t) + { + _apply_seed(); + m_tree->_set_key(m_id, csubstr{}); + return 0; + } + + template + size_t set_val_serialized(T const& C4_RESTRICT v) + { + _apply_seed(); + csubstr s = m_tree->to_arena(v); + m_tree->_set_val(m_id, s); + return s.len; + } + size_t set_val_serialized(std::nullptr_t) + { + _apply_seed(); + m_tree->_set_val(m_id, csubstr{}); + return 0; + } + + /** encode a blob as base64 into the tree's arena, then assign the + * result to the node's key + * @return the size of base64-encoded blob */ + size_t set_key_serialized(fmt::const_base64_wrapper w); + /** encode a blob as base64 into the tree's arena, then assign the + * result to the node's val + * @return the size of base64-encoded blob */ + size_t set_val_serialized(fmt::const_base64_wrapper w); + /** serialize a variable, then assign the result to the node's val */ inline NodeRef& operator<< (csubstr s) { @@ -21222,7 +23951,7 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods // operator<< for writing a substr to a stream) _apply_seed(); write(this, s); - RYML_ASSERT(val() == s); + _RYML_CB_ASSERT(m_tree->m_callbacks, val() == s); return *this; } @@ -21270,24 +23999,24 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods void _apply_seed() { + _C4RID(); if(m_seed.str) // we have a seed key: use it to create the new child { - //RYML_ASSERT(i.key.scalar.empty() || m_key == i.key.scalar || m_key.empty()); m_id = m_tree->append_child(m_id); m_tree->_set_key(m_id, m_seed); m_seed.str = nullptr; - m_seed.len = NONE; + m_seed.len = (size_t)NONE; } - else if(m_seed.len != NONE) // we have a seed index: create a child at that position + else if(m_seed.len != (size_t)NONE) // we have a seed index: create a child at that position { - RYML_ASSERT(m_tree->num_children(m_id) == m_seed.len); + _RYML_CB_ASSERT(m_tree->m_callbacks, (size_t)m_tree->num_children(m_id) == m_seed.len); m_id = m_tree->append_child(m_id); m_seed.str = nullptr; - m_seed.len = NONE; + m_seed.len = (size_t)NONE; } else { - RYML_ASSERT(valid()); + _RYML_CB_ASSERT(m_tree->m_callbacks, readable()); } } @@ -21313,16 +24042,16 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods inline NodeRef insert_child(NodeRef after) { - _C4RV(); - RYML_ASSERT(after.m_tree == m_tree); + _C4RR(); + _RYML_CB_ASSERT(m_tree->m_callbacks, after.m_tree == m_tree); NodeRef r(m_tree, m_tree->insert_child(m_id, after.m_id)); return r; } inline NodeRef insert_child(NodeInit const& i, NodeRef after) { - _C4RV(); - RYML_ASSERT(after.m_tree == m_tree); + _C4RR(); + _RYML_CB_ASSERT(m_tree->m_callbacks, after.m_tree == m_tree); NodeRef r(m_tree, m_tree->insert_child(m_id, after.m_id)); r._apply(i); return r; @@ -21330,14 +24059,14 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods inline NodeRef prepend_child() { - _C4RV(); + _C4RR(); NodeRef r(m_tree, m_tree->insert_child(m_id, NONE)); return r; } inline NodeRef prepend_child(NodeInit const& i) { - _C4RV(); + _C4RR(); NodeRef r(m_tree, m_tree->insert_child(m_id, NONE)); r._apply(i); return r; @@ -21345,33 +24074,31 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods inline NodeRef append_child() { - _C4RV(); + _C4RR(); NodeRef r(m_tree, m_tree->append_child(m_id)); return r; } inline NodeRef append_child(NodeInit const& i) { - _C4RV(); + _C4RR(); NodeRef r(m_tree, m_tree->append_child(m_id)); r._apply(i); return r; } -public: - inline NodeRef insert_sibling(ConstNodeRef const& after) { - _C4RV(); - RYML_ASSERT(after.m_tree == m_tree); + _C4RR(); + _RYML_CB_ASSERT(m_tree->m_callbacks, after.m_tree == m_tree); NodeRef r(m_tree, m_tree->insert_sibling(m_id, after.m_id)); return r; } inline NodeRef insert_sibling(NodeInit const& i, ConstNodeRef const& after) { - _C4RV(); - RYML_ASSERT(after.m_tree == m_tree); + _C4RR(); + _RYML_CB_ASSERT(m_tree->m_callbacks, after.m_tree == m_tree); NodeRef r(m_tree, m_tree->insert_sibling(m_id, after.m_id)); r._apply(i); return r; @@ -21379,14 +24106,14 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods inline NodeRef prepend_sibling() { - _C4RV(); + _C4RR(); NodeRef r(m_tree, m_tree->prepend_sibling(m_id)); return r; } inline NodeRef prepend_sibling(NodeInit const& i) { - _C4RV(); + _C4RR(); NodeRef r(m_tree, m_tree->prepend_sibling(m_id)); r._apply(i); return r; @@ -21394,14 +24121,14 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods inline NodeRef append_sibling() { - _C4RV(); + _C4RR(); NodeRef r(m_tree, m_tree->append_sibling(m_id)); return r; } inline NodeRef append_sibling(NodeInit const& i) { - _C4RV(); + _C4RR(); NodeRef r(m_tree, m_tree->append_sibling(m_id)); r._apply(i); return r; @@ -21411,29 +24138,29 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods inline void remove_child(NodeRef & child) { - _C4RV(); - RYML_ASSERT(has_child(child)); - RYML_ASSERT(child.parent().id() == id()); + _C4RR(); + _RYML_CB_ASSERT(m_tree->m_callbacks, has_child(child)); + _RYML_CB_ASSERT(m_tree->m_callbacks, child.parent().id() == id()); m_tree->remove(child.id()); child.clear(); } //! remove the nth child of this node - inline void remove_child(size_t pos) + inline void remove_child(id_type pos) { - _C4RV(); - RYML_ASSERT(pos >= 0 && pos < num_children()); - size_t child = m_tree->child(m_id, pos); - RYML_ASSERT(child != NONE); + _C4RR(); + _RYML_CB_ASSERT(m_tree->m_callbacks, pos >= 0 && pos < num_children()); + id_type child = m_tree->child(m_id, pos); + _RYML_CB_ASSERT(m_tree->m_callbacks, child != NONE); m_tree->remove(child); } //! remove a child by name inline void remove_child(csubstr key) { - _C4RV(); - size_t child = m_tree->find_child(m_id, key); - RYML_ASSERT(child != NONE); + _C4RR(); + id_type child = m_tree->find_child(m_id, key); + _RYML_CB_ASSERT(m_tree->m_callbacks, child != NONE); m_tree->remove(child); } @@ -21445,7 +24172,7 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods * `n.move({})`. */ inline void move(ConstNodeRef const& after) { - _C4RV(); + _C4RR(); m_tree->move(m_id, after.m_id); } @@ -21455,7 +24182,7 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods * pointer is reset to the tree of the parent node. */ inline void move(NodeRef const& parent, ConstNodeRef const& after) { - _C4RV(); + _C4RR(); if(parent.m_tree == m_tree) { m_tree->move(m_id, parent.m_id, after.m_id); @@ -21473,9 +24200,9 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods * default-constructed reference like this: `n.move({})`. */ inline NodeRef duplicate(ConstNodeRef const& after) const { - _C4RV(); - RYML_ASSERT(m_tree == after.m_tree || after.m_id == NONE); - size_t dup = m_tree->duplicate(m_id, m_tree->parent(m_id), after.m_id); + _C4RR(); + _RYML_CB_ASSERT(m_tree->m_callbacks, m_tree == after.m_tree || after.m_id == NONE); + id_type dup = m_tree->duplicate(m_id, m_tree->parent(m_id), after.m_id); NodeRef r(m_tree, dup); return r; } @@ -21487,17 +24214,17 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods * this: `n.move({})`. */ inline NodeRef duplicate(NodeRef const& parent, ConstNodeRef const& after) const { - _C4RV(); - RYML_ASSERT(parent.m_tree == after.m_tree || after.m_id == NONE); + _C4RR(); + _RYML_CB_ASSERT(m_tree->m_callbacks, parent.m_tree == after.m_tree || after.m_id == NONE); if(parent.m_tree == m_tree) { - size_t dup = m_tree->duplicate(m_id, parent.m_id, after.m_id); + id_type dup = m_tree->duplicate(m_id, parent.m_id, after.m_id); NodeRef r(m_tree, dup); return r; } else { - size_t dup = parent.m_tree->duplicate(m_tree, m_id, parent.m_id, after.m_id); + id_type dup = parent.m_tree->duplicate(m_tree, m_id, parent.m_id, after.m_id); NodeRef r(parent.m_tree, dup); return r; } @@ -21505,8 +24232,8 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods inline void duplicate_children(NodeRef const& parent, ConstNodeRef const& after) const { - _C4RV(); - RYML_ASSERT(parent.m_tree == after.m_tree); + _C4RR(); + _RYML_CB_ASSERT(m_tree->m_callbacks, parent.m_tree == after.m_tree); if(parent.m_tree == m_tree) { m_tree->duplicate_children(m_id, parent.m_id, after.m_id); @@ -21519,85 +24246,178 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods /** @} */ -#undef _C4RV +#undef _C4RR +#undef _C4RID }; //----------------------------------------------------------------------------- -inline ConstNodeRef::ConstNodeRef(NodeRef const& that) +inline ConstNodeRef::ConstNodeRef(NodeRef const& that) noexcept : m_tree(that.m_tree) - , m_id(!that.is_seed() ? that.id() : NONE) + , m_id(!that.is_seed() ? that.id() : (id_type)NONE) { } -inline ConstNodeRef::ConstNodeRef(NodeRef && that) +inline ConstNodeRef::ConstNodeRef(NodeRef && that) noexcept : m_tree(that.m_tree) - , m_id(!that.is_seed() ? that.id() : NONE) + , m_id(!that.is_seed() ? that.id() : (id_type)NONE) { } -inline ConstNodeRef& ConstNodeRef::operator= (NodeRef const& that) +inline ConstNodeRef& ConstNodeRef::operator= (NodeRef const& that) noexcept { m_tree = (that.m_tree); - m_id = (!that.is_seed() ? that.id() : NONE); + m_id = (!that.is_seed() ? that.id() : (id_type)NONE); return *this; } -inline ConstNodeRef& ConstNodeRef::operator= (NodeRef && that) +inline ConstNodeRef& ConstNodeRef::operator= (NodeRef && that) noexcept { m_tree = (that.m_tree); - m_id = (!that.is_seed() ? that.id() : NONE); + m_id = (!that.is_seed() ? that.id() : (id_type)NONE); return *this; } //----------------------------------------------------------------------------- +/** @addtogroup doc_serialization_helpers + * + * @{ + */ + template inline void write(NodeRef *n, T const& v) { n->set_val_serialized(v); } +namespace detail { +// SFINAE overloads for skipping leading + which cannot be read by the charconv functions template -typename std::enable_if< ! std::is_floating_point::value, bool>::type -inline read(NodeRef const& n, T *v) +C4_ALWAYS_INLINE auto read_skip_plus(csubstr val, T *v) + -> typename std::enable_if::value, bool>::type { - return from_chars(n.val(), v); + if(val.begins_with('+')) + val = val.sub(1); + return from_chars(val, v); } template -typename std::enable_if< ! std::is_floating_point::value, bool>::type -inline read(ConstNodeRef const& n, T *v) +C4_ALWAYS_INLINE auto read_skip_plus(csubstr val, T *v) + -> typename std::enable_if< ! std::is_arithmetic::value, bool>::type +{ + return from_chars(val, v); +} +} // namespace detail + +/** convert the val of a scalar node to a particular type, by + * forwarding its val to @ref from_chars(). The full string is + * used. + * @return false if the conversion failed */ +template +inline auto read(NodeRef const& n, T *v) + -> typename std::enable_if< ! std::is_floating_point::value, bool>::type +{ + csubstr val = n.val(); + if(val.empty()) + return false; + return detail::read_skip_plus(val, v); +} +/** convert the val of a scalar node to a particular type, by + * forwarding its val to @ref from_chars(). The full string is + * used. + * @return false if the conversion failed */ +template +inline auto read(ConstNodeRef const& n, T *v) + -> typename std::enable_if< ! std::is_floating_point::value, bool>::type { - return from_chars(n.val(), v); + csubstr val = n.val(); + if(val.empty()) + return false; + return detail::read_skip_plus(val, v); } +/** convert the val of a scalar node to a floating point type, by + * forwarding its val to @ref from_chars_float(). + * + * @return false if the conversion failed + * + * @warning Unlike non-floating types, only the leading part of the + * string that may constitute a number is processed. This happens + * because the float parsing is delegated to fast_float, which is + * implemented that way. Consequently, for example, all of `"34"`, + * `"34 "` `"34hg"` `"34 gh"` will be read as 34. If you are not sure + * about the contents of the data, you can use + * csubstr::first_real_span() to check before calling `>>`, for + * example like this: + * + * ```cpp + * csubstr val = node.val(); + * if(val.first_real_span() == val) + * node >> v; + * else + * ERROR("not a real") + * ``` + */ template typename std::enable_if::value, bool>::type inline read(NodeRef const& n, T *v) { - return from_chars_float(n.val(), v); + csubstr val = n.val(); + if(val.empty()) + return false; + return from_chars_float(val, v); } +/** convert the val of a scalar node to a floating point type, by + * forwarding its val to @ref from_chars_float(). + * + * @return false if the conversion failed + * + * @warning Unlike non-floating types, only the leading part of the + * string that may constitute a number is processed. This happens + * because the float parsing is delegated to fast_float, which is + * implemented that way. Consequently, for example, all of `"34"`, + * `"34 "` `"34hg"` `"34 gh"` will be read as 34. If you are not sure + * about the contents of the data, you can use + * csubstr::first_real_span() to check before calling `>>`, for + * example like this: + * + * ```cpp + * csubstr val = node.val(); + * if(val.first_real_span() == val) + * node >> v; + * else + * ERROR("not a real") + * ``` + */ template typename std::enable_if::value, bool>::type inline read(ConstNodeRef const& n, T *v) { - return from_chars_float(n.val(), v); + csubstr val = n.val(); + if(val.empty()) + return false; + return from_chars_float(val, v); } +/** @} */ + +/** @} */ + } // namespace yml } // namespace c4 -#if defined(_MSC_VER) -# pragma warning(pop) -#endif -#ifdef __GNUC__ +#ifdef __clang__ +# pragma clang diagnostic pop +#elif defined(__GNUC__) # pragma GCC diagnostic pop +#elif defined(_MSC_VER) +# pragma warning(pop) #endif #endif /* _C4_YML_NODE_HPP_ */ @@ -21637,17 +24457,14 @@ inline read(ConstNodeRef const& n, T *v) namespace c4 { namespace yml { +/** @addtogroup doc_emit + * @{ + */ -/** Repeat-Character: a character to be written a number of times. */ -struct RepC -{ - char c; - size_t num_times; -}; -inline RepC indent_to(size_t num_levels) -{ - return {' ', size_t(2) * num_levels}; -} +/** @defgroup doc_writers Writer objects to use with an Emitter + * @see Emitter + * @{ + */ //----------------------------------------------------------------------------- @@ -21701,13 +24518,11 @@ struct WriterFile ++m_pos; } - inline void _do_write(RepC const rc) + inline void _do_write(const char c, size_t num_times) { - for(size_t i = 0; i < rc.num_times; ++i) - { - fputc(rc.c, m_file); - } - m_pos += rc.num_times; + for(size_t i = 0; i < num_times; ++i) + fputc(c, m_file); + m_pos += num_times; } }; @@ -21764,13 +24579,11 @@ struct WriterOStream ++m_pos; } - inline void _do_write(RepC const rc) + inline void _do_write(const char c, size_t num_times) { - for(size_t i = 0; i < rc.num_times; ++i) - { - m_stream.put(rc.c); - } - m_pos += rc.num_times; + for(size_t i = 0; i < num_times; ++i) + m_stream.put(c); + m_pos += num_times; } }; @@ -21827,25 +24640,23 @@ struct WriterBuf inline void _do_write(const char c) { if(m_pos + 1 <= m_buf.len) - { m_buf[m_pos] = c; - } ++m_pos; } - inline void _do_write(RepC const rc) + inline void _do_write(const char c, size_t num_times) { - if(m_pos + rc.num_times <= m_buf.len) - { - for(size_t i = 0; i < rc.num_times; ++i) - { - m_buf[m_pos + i] = rc.c; - } - } - m_pos += rc.num_times; + if(m_pos + num_times <= m_buf.len) + for(size_t i = 0; i < num_times; ++i) + m_buf[m_pos + i] = c; + m_pos += num_times; } }; +/** @ } */ + +/** @ } */ + } // namespace yml } // namespace c4 @@ -21870,8 +24681,12 @@ struct WriterBuf #ifndef _C4_YML_COMMON_HPP_ #include "../common.hpp" #endif + +#ifdef RYML_DBG //included above: //#include +#endif + //----------------------------------------------------------------------------- // some debugging scaffolds @@ -21890,8 +24705,44 @@ struct WriterBuf #pragma clang diagnostic ignored "-Werror" #pragma clang diagnostic ignored "-Wgnu-zero-variadic-macro-arguments" -// some debugging scaffolds + +#ifndef RYML_DBG +# define _c4err(fmt, ...) \ + this->_err("ERROR: " fmt, ## __VA_ARGS__) +# define _c4dbgt(fmt, ...) +# define _c4dbgpf(fmt, ...) +# define _c4dbgpf_(fmt, ...) +# define _c4dbgp(msg) +# define _c4dbgp_(msg) +# define _c4dbgq(msg) +# define _c4presc(...) +# define _c4prscalar(msg, scalar, keep_newlines) +#else +# define _c4err(fmt, ...) \ + do { RYML_DEBUG_BREAK(); this->_err("ERROR:\n" "{}:{}: " fmt, __FILE__, __LINE__, ## __VA_ARGS__); } while(0) +# define _c4dbgt(fmt, ...) do { if(_dbg_enabled()) { \ + this->_dbg ("{}:{}: " fmt , __FILE__, __LINE__, ## __VA_ARGS__); } } while(0) +# define _c4dbgpf(fmt, ...) _dbg_printf("{}:{}: " fmt "\n", __FILE__, __LINE__, ## __VA_ARGS__) +# define _c4dbgpf_(fmt, ...) _dbg_printf("{}:{}: " fmt , __FILE__, __LINE__, ## __VA_ARGS__) +# define _c4dbgp(msg) _dbg_printf("{}:{}: " msg "\n", __FILE__, __LINE__ ) +# define _c4dbgp_(msg) _dbg_printf("{}:{}: " msg , __FILE__, __LINE__ ) +# define _c4dbgq(msg) _dbg_printf(msg "\n") +# define _c4presc(...) do { if(_dbg_enabled()) __c4presc(__VA_ARGS__); } while(0) +# define _c4prscalar(msg, scalar, keep_newlines) \ + do { \ + _c4dbgpf_("{}: [{}]~~~", msg, scalar.len); \ + if(_dbg_enabled()) { \ + __c4presc((scalar).str, (scalar).len, (keep_newlines)); \ + } \ + _c4dbgq("~~~"); \ + } while(0) +#endif // RYML_DBG + + +//----------------------------------------------------------------------------- + #ifdef RYML_DBG + // amalgamate: removed include of // https://github.com/biojppm/rapidyaml/src/c4/dump.hpp //#include @@ -21900,104 +24751,83 @@ struct WriterBuf #endif /* C4_DUMP_HPP_ */ namespace c4 { -inline void _dbg_dumper(csubstr s) { fwrite(s.str, 1, s.len, stdout); }; +inline bool& _dbg_enabled() { static bool enabled = true; return enabled; } +inline void _dbg_set_enabled(bool yes) { _dbg_enabled() = yes; } +inline void _dbg_dumper(csubstr s) +{ + if(s.str) + fwrite(s.str, 1, s.len, stdout); +} +inline substr _dbg_buf() noexcept +{ + static char writebuf[2048]; + return writebuf; +} template -void _dbg_printf(c4::csubstr fmt, Args&& ...args) +C4_NO_INLINE void _dbg_printf(c4::csubstr fmt, Args const& ...args) { - static char writebuf[256]; - auto results = c4::format_dump_resume<&_dbg_dumper>(writebuf, fmt, std::forward(args)...); - // resume writing if the results failed to fit the buffer - if(C4_UNLIKELY(results.bufsize > sizeof(writebuf))) // bufsize will be that of the largest element serialized. Eg int(1), will require 1 byte. + if(_dbg_enabled()) { - results = format_dump_resume<&_dbg_dumper>(results, writebuf, fmt, std::forward(args)...); - if(C4_UNLIKELY(results.bufsize > sizeof(writebuf))) - { - results = format_dump_resume<&_dbg_dumper>(results, writebuf, fmt, std::forward(args)...); - } + substr buf = _dbg_buf(); + const size_t needed_size = c4::format_dump(&_dbg_dumper, buf, fmt, args...); + C4_CHECK(needed_size <= buf.len); } } -} // namespace c4 - -# define _c4dbgt(fmt, ...) this->_dbg ("{}:{}: " fmt , __FILE__, __LINE__, ## __VA_ARGS__) -# define _c4dbgpf(fmt, ...) _dbg_printf("{}:{}: " fmt "\n", __FILE__, __LINE__, ## __VA_ARGS__) -# define _c4dbgp(msg) _dbg_printf("{}:{}: " msg "\n", __FILE__, __LINE__ ) -# define _c4dbgq(msg) _dbg_printf(msg "\n") -# define _c4err(fmt, ...) \ - do { if(c4::is_debugger_attached()) { C4_DEBUG_BREAK(); } \ - this->_err("ERROR:\n" "{}:{}: " fmt, __FILE__, __LINE__, ## __VA_ARGS__); } while(0) -#else -# define _c4dbgt(fmt, ...) -# define _c4dbgpf(fmt, ...) -# define _c4dbgp(msg) -# define _c4dbgq(msg) -# define _c4err(fmt, ...) \ - do { if(c4::is_debugger_attached()) { C4_DEBUG_BREAK(); } \ - this->_err("ERROR: " fmt, ## __VA_ARGS__); } while(0) -#endif - -#define _c4prsp(sp) sp -#define _c4presc(s) __c4presc(s.str, s.len) -inline c4::csubstr _c4prc(const char &C4_RESTRICT c) -{ - switch(c) - { - case '\n': return c4::csubstr("\\n"); - case '\t': return c4::csubstr("\\t"); - case '\0': return c4::csubstr("\\0"); - case '\r': return c4::csubstr("\\r"); - case '\f': return c4::csubstr("\\f"); - case '\b': return c4::csubstr("\\b"); - case '\v': return c4::csubstr("\\v"); - case '\a': return c4::csubstr("\\a"); - default: return c4::csubstr(&c, 1); - } -} -inline void __c4presc(const char *s, size_t len) +inline void __c4presc(const char *s, size_t len, bool keep_newlines=false) { + RYML_ASSERT(s || !len); size_t prev = 0; for(size_t i = 0; i < len; ++i) { switch(s[i]) { - case '\n' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('n'); putchar('\n'); prev = i+1; break; - case '\t' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('t'); prev = i+1; break; - case '\0' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('0'); prev = i+1; break; - case '\r' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('r'); prev = i+1; break; - case '\f' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('f'); prev = i+1; break; - case '\b' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('b'); prev = i+1; break; - case '\v' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('v'); prev = i+1; break; - case '\a' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('a'); prev = i+1; break; - case '\x1b': fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('e'); prev = i+1; break; + case '\n' : _dbg_printf("{}{}{}", csubstr(s+prev, i-prev), csubstr("\\n"), csubstr(keep_newlines ? "\n":"")); prev = i+1; break; + case '\t' : _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\t")); prev = i+1; break; + case '\0' : _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\0")); prev = i+1; break; + case '\r' : _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\r")); prev = i+1; break; + case '\f' : _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\f")); prev = i+1; break; + case '\b' : _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\b")); prev = i+1; break; + case '\v' : _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\v")); prev = i+1; break; + case '\a' : _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\a")); prev = i+1; break; + case '\x1b': _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\x1b")); prev = i+1; break; case -0x3e/*0xc2u*/: if(i+1 < len) { if(s[i+1] == -0x60/*0xa0u*/) { - fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('_'); prev = i+2; ++i; + _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\_")); prev = i+1; } else if(s[i+1] == -0x7b/*0x85u*/) { - fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('N'); prev = i+2; ++i; + _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\N")); prev = i+1; } - break; } + break; case -0x1e/*0xe2u*/: if(i+2 < len && s[i+1] == -0x80/*0x80u*/) { if(s[i+2] == -0x58/*0xa8u*/) { - fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('L'); prev = i+3; i += 2; + _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\L")); prev = i+1; } else if(s[i+2] == -0x57/*0xa9u*/) { - fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('P'); prev = i+3; i += 2; + _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\P")); prev = i+1; } - break; } + break; } } - fwrite(s + prev, 1, len - prev, stdout); + if(len > prev) + _dbg_printf("{}", csubstr(s+prev, len-prev)); +} +inline void __c4presc(csubstr s, bool keep_newlines=false) +{ + __c4presc(s.str, s.len, keep_newlines); } +} // namespace c4 + +#endif // RYML_DBG #pragma clang diagnostic pop #pragma GCC diagnostic pop @@ -22006,7 +24836,6 @@ inline void __c4presc(const char *s, size_t len) # pragma warning(pop) #endif - #endif /* _C4_YML_DETAIL_PARSER_DBG_HPP_ */ @@ -22026,6 +24855,8 @@ inline void __c4presc(const char *s, size_t len) #ifndef _C4_YML_EMIT_HPP_ #define _C4_YML_EMIT_HPP_ +/** @file emit.hpp Utilities to emit YAML and JSON. */ + #ifndef _C4_YML_WRITER_HPP_ #include "./writer.hpp" #endif @@ -22038,14 +24869,21 @@ inline void __c4presc(const char *s, size_t len) #include "./node.hpp" #endif - #define RYML_DEPRECATE_EMIT \ - RYML_DEPRECATED("use emit_yaml() instead. See https://github.com/biojppm/rapidyaml/issues/120") + RYML_DEPRECATED("use emit_yaml() instead. " \ + "See https://github.com/biojppm/rapidyaml/issues/120") +#define RYML_DEPRECATE_EMITRS \ + RYML_DEPRECATED("use emitrs_yaml() instead. " \ + "See https://github.com/biojppm/rapidyaml/issues/120") + #ifdef emit -#error "emit is defined, likely from a Qt include. This will cause a compilation error. See https://github.com/biojppm/rapidyaml/issues/120" +#error "emit is defined, likely from a Qt include. " \ + "This will cause a compilation error. " \ + "See https://github.com/biojppm/rapidyaml/issues/120" #endif -#define RYML_DEPRECATE_EMITRS \ - RYML_DEPRECATED("use emitrs_yaml() instead. See https://github.com/biojppm/rapidyaml/issues/120") + + +C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast") //----------------------------------------------------------------------------- @@ -22055,27 +24893,84 @@ inline void __c4presc(const char *s, size_t len) namespace c4 { namespace yml { -template class Emitter; +/** @addtogroup doc_emit + * + * @{ + */ +// fwd declarations +template class Emitter; template using EmitterOStream = Emitter>; using EmitterFile = Emitter; using EmitterBuf = Emitter; +namespace detail { +inline bool is_set_(ConstNodeRef n) { return n.tree() && (n.id() != NONE); } +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** Specifies the type of content to emit */ typedef enum { - EMIT_YAML = 0, - EMIT_JSON = 1 + EMIT_YAML = 0, ///< emit YAML + EMIT_JSON = 1 ///< emit JSON } EmitType_e; -/** mark a tree or node to be emitted as json */ -struct as_json +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** A lightweight object containing options to be used when emitting. */ +struct EmitOptions { - Tree const* tree; - size_t node; - as_json(Tree const& t) : tree(&t), node(t.empty() ? NONE : t.root_id()) {} - as_json(Tree const& t, size_t id) : tree(&t), node(id) {} - as_json(ConstNodeRef const& n) : tree(n.tree()), node(n.id()) {} + typedef enum : uint32_t { + DEFAULT_FLAGS = 0, + JSON_ERR_ON_TAG = 1 << 0, + JSON_ERR_ON_ANCHOR = 1 << 1, + _JSON_ERR_MASK = JSON_ERR_ON_TAG|JSON_ERR_ON_ANCHOR, + } EmitOptionFlags_e; + +public: + + /** @name option flags + * + * @{ */ + C4_ALWAYS_INLINE EmitOptionFlags_e json_error_flags() const noexcept { return m_option_flags; } + EmitOptions& json_error_flags(EmitOptionFlags_e d) noexcept { m_option_flags = (EmitOptionFlags_e)(d & _JSON_ERR_MASK); return *this; } + /** @} */ + +public: + + /** @name max depth for the emitted tree + * + * This makes the emitter fail when emitting trees exceeding the + * max_depth. + * + * @{ */ + C4_ALWAYS_INLINE id_type max_depth() const noexcept { return m_max_depth; } + EmitOptions& max_depth(id_type d) noexcept { m_max_depth = d; return *this; } + static constexpr const id_type max_depth_default = 64; + /** @} */ + +public: + + bool operator== (const EmitOptions& that) const noexcept + { + return m_max_depth == that.m_max_depth && + m_option_flags == that.m_option_flags; + } + +private: + + /** @cond dev */ + id_type m_max_depth{max_depth_default}; + EmitOptionFlags_e m_option_flags{DEFAULT_FLAGS}; + /** @endcond */ }; @@ -22083,52 +24978,101 @@ struct as_json //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- +/** A stateful emitter, for use with a writer such as @ref WriterBuf, + * @ref WriterFile, or @ref WriterOStream */ template class Emitter : public Writer { public: - using Writer::Writer; + /** Construct the emitter and its internal Writer state, using default emit options. + * @param args arguments to be forwarded to the constructor of the writer. + * */ + template + Emitter(Args &&...args) : Writer(std::forward(args)...), m_tree(), m_opts(), m_flow(false) {} + + /** Construct the emitter and its internal Writer state. + * + * @param opts EmitOptions + * @param args arguments to be forwarded to the constructor of the writer. + * */ + template + Emitter(EmitOptions const& opts, Args &&...args) : Writer(std::forward(args)...), m_tree(), m_opts(opts), m_flow(false) {} /** emit! * * When writing to a buffer, returns a substr of the emitted YAML. - * If the given buffer has insufficient space, the returned span will - * be null and its size will be the needed space. No writes are done - * after the end of the buffer. + * If the given buffer has insufficient space, the returned substr + * will be null and its size will be the needed space. Whatever + * the size of the buffer, it is guaranteed that no writes are + * done past its end. * * When writing to a file, the returned substr will be null, but its - * length will be set to the number of bytes written. */ - substr emit_as(EmitType_e type, Tree const& t, size_t id, bool error_on_excess); + * length will be set to the number of bytes written. + * + * @param type specify what to emit + * @param t the tree to emit + * @param id the id of the node to emit + * @param error_on_excess when true, an error is raised when the + * output buffer is too small for the emitted YAML/JSON + * */ + substr emit_as(EmitType_e type, Tree const& t, id_type id, bool error_on_excess); /** emit starting at the root node */ - substr emit_as(EmitType_e type, Tree const& t, bool error_on_excess=true); - /** emit the given node */ - substr emit_as(EmitType_e type, ConstNodeRef const& n, bool error_on_excess=true); + substr emit_as(EmitType_e type, Tree const& t, bool error_on_excess=true) + { + if(t.empty()) + return {}; + return this->emit_as(type, t, t.root_id(), error_on_excess); + } + /** emit starting at the given node */ + substr emit_as(EmitType_e type, ConstNodeRef const& n, bool error_on_excess=true) + { + if(!detail::is_set_(n)) + return {}; + _RYML_CB_CHECK(n.tree()->callbacks(), n.readable()); + return this->emit_as(type, *n.tree(), n.id(), error_on_excess); + } + +public: + + /** get the emit options for this object */ + EmitOptions const& options() const noexcept { return m_opts; } + + /** set the max depth for emitted trees (to prevent a stack overflow) */ + void max_depth(id_type max_depth) noexcept { m_opts.max_depth(max_depth); } + /** get the max depth for emitted trees (to prevent a stack overflow) */ + id_type max_depth() const noexcept { return m_opts.max_depth(); } private: Tree const* C4_RESTRICT m_tree; + EmitOptions m_opts; + bool m_flow; + +private: - void _emit_yaml(size_t id); - void _do_visit_flow_sl(size_t id, size_t ilevel=0); - void _do_visit_flow_ml(size_t id, size_t ilevel=0, size_t do_indent=1); - void _do_visit_block(size_t id, size_t ilevel=0, size_t do_indent=1); - void _do_visit_block_container(size_t id, size_t next_level, size_t do_indent); - void _do_visit_json(size_t id); + void _emit_yaml(id_type id); + void _do_visit_flow_sl(id_type id, id_type depth, id_type ilevel=0); + void _do_visit_flow_ml(id_type id, id_type depth, id_type ilevel=0, id_type do_indent=1); + void _do_visit_block(id_type id, id_type depth, id_type ilevel=0, id_type do_indent=1); + void _do_visit_block_container(id_type id, id_type depth, id_type next_level, bool do_indent); + void _do_visit_json(id_type id, id_type depth); private: - void _write(NodeScalar const& C4_RESTRICT sc, NodeType flags, size_t level); + void _write(NodeScalar const& C4_RESTRICT sc, NodeType flags, id_type level); void _write_json(NodeScalar const& C4_RESTRICT sc, NodeType flags); - void _write_doc(size_t id); - void _write_scalar(csubstr s, bool was_quoted); - void _write_scalar_json(csubstr s, bool as_key, bool was_quoted); - void _write_scalar_literal(csubstr s, size_t level, bool as_key, bool explicit_indentation=false); - void _write_scalar_folded(csubstr s, size_t level, bool as_key); - void _write_scalar_squo(csubstr s, size_t level); - void _write_scalar_dquo(csubstr s, size_t level); - void _write_scalar_plain(csubstr s, size_t level); + void _write_doc(id_type id); + void _write_scalar_json_dquo(csubstr s); + void _write_scalar_literal(csubstr s, id_type level, bool as_key); + void _write_scalar_folded(csubstr s, id_type level, bool as_key); + void _write_scalar_squo(csubstr s, id_type level); + void _write_scalar_dquo(csubstr s, id_type level); + void _write_scalar_plain(csubstr s, id_type level); + + size_t _write_escaped_newlines(csubstr s, size_t i); + size_t _write_indented_block(csubstr s, size_t i, id_type level); void _write_tag(csubstr tag) { @@ -22138,18 +25082,28 @@ class Emitter : public Writer } enum : type_bits { - _keysc = (KEY|KEYREF|KEYANCH|KEYQUO|_WIP_KEY_STYLE) | ~(VAL|VALREF|VALANCH|VALQUO|_WIP_VAL_STYLE), - _valsc = ~(KEY|KEYREF|KEYANCH|KEYQUO|_WIP_KEY_STYLE) | (VAL|VALREF|VALANCH|VALQUO|_WIP_VAL_STYLE), + _keysc = (KEY|KEYREF|KEYANCH|KEYQUO|KEY_STYLE) | ~(VAL|VALREF|VALANCH|VALQUO|VAL_STYLE) | CONTAINER_STYLE, + _valsc = ~(KEY|KEYREF|KEYANCH|KEYQUO|KEY_STYLE) | (VAL|VALREF|VALANCH|VALQUO|VAL_STYLE) | CONTAINER_STYLE, _keysc_json = (KEY) | ~(VAL), _valsc_json = ~(KEY) | (VAL), }; - C4_ALWAYS_INLINE void _writek(size_t id, size_t level) { _write(m_tree->keysc(id), m_tree->_p(id)->m_type.type & ~_valsc, level); } - C4_ALWAYS_INLINE void _writev(size_t id, size_t level) { _write(m_tree->valsc(id), m_tree->_p(id)->m_type.type & ~_keysc, level); } + C4_ALWAYS_INLINE void _writek(id_type id, id_type level) { _write(m_tree->keysc(id), (m_tree->_p(id)->m_type.type & ~_valsc), level); } + C4_ALWAYS_INLINE void _writev(id_type id, id_type level) { _write(m_tree->valsc(id), (m_tree->_p(id)->m_type.type & ~_keysc), level); } - C4_ALWAYS_INLINE void _writek_json(size_t id) { _write_json(m_tree->keysc(id), m_tree->_p(id)->m_type.type & ~(VAL)); } - C4_ALWAYS_INLINE void _writev_json(size_t id) { _write_json(m_tree->valsc(id), m_tree->_p(id)->m_type.type & ~(KEY)); } + C4_ALWAYS_INLINE void _writek_json(id_type id) { _write_json(m_tree->keysc(id), m_tree->_p(id)->m_type.type & ~(VAL)); } + C4_ALWAYS_INLINE void _writev_json(id_type id) { _write_json(m_tree->valsc(id), m_tree->_p(id)->m_type.type & ~(KEY)); } + void _indent(id_type level, bool enabled) + { + if(enabled) + this->Writer::_do_write(' ', 2u * (size_t)level); + } + void _indent(id_type level) + { + if(!m_flow) + this->Writer::_do_write(' ', 2u * (size_t)level); + } }; @@ -22157,43 +25111,65 @@ class Emitter : public Writer //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- -/** emit YAML to the given file. A null file defaults to stdout. - * Return the number of bytes written. */ -inline size_t emit_yaml(Tree const& t, size_t id, FILE *f) +/** @defgroup doc_emit_to_file Emit to file + * + * @{ + */ + + +// emit from tree and node id ----------------------- + +/** (1) emit YAML to the given file, starting at the given node. A null + * file defaults to stdout. Return the number of bytes written. */ +inline size_t emit_yaml(Tree const& t, id_type id, EmitOptions const& opts, FILE *f) +{ + EmitterFile em(opts, f); + return em.emit_as(EMIT_YAML, t, id, /*error_on_excess*/true).len; +} +/** (2) like (1), but use default emit options */ +inline size_t emit_yaml(Tree const& t, id_type id, FILE *f) { EmitterFile em(f); return em.emit_as(EMIT_YAML, t, id, /*error_on_excess*/true).len; } -RYML_DEPRECATE_EMIT inline size_t emit(Tree const& t, size_t id, FILE *f) +/** (1) emit JSON to the given file, starting at the given node. A null + * file defaults to stdout. Return the number of bytes written. */ +inline size_t emit_json(Tree const& t, id_type id, EmitOptions const& opts, FILE *f) { - return emit_yaml(t, id, f); + EmitterFile em(opts, f); + return em.emit_as(EMIT_JSON, t, id, /*error_on_excess*/true).len; } - -/** emit JSON to the given file. A null file defaults to stdout. - * Return the number of bytes written. */ -inline size_t emit_json(Tree const& t, size_t id, FILE *f) +/** (2) like (1), but use default emit options */ +inline size_t emit_json(Tree const& t, id_type id, FILE *f) { EmitterFile em(f); return em.emit_as(EMIT_JSON, t, id, /*error_on_excess*/true).len; } -/** emit YAML to the given file. A null file defaults to stdout. - * Return the number of bytes written. - * @overload */ +// emit from root ------------------------- + +/** (1) emit YAML to the given file, starting at the root node. A null file defaults to stdout. + * Return the number of bytes written. */ +inline size_t emit_yaml(Tree const& t, EmitOptions const& opts, FILE *f=nullptr) +{ + EmitterFile em(opts, f); + return em.emit_as(EMIT_YAML, t, /*error_on_excess*/true).len; +} +/** (2) like (1), but use default emit options */ inline size_t emit_yaml(Tree const& t, FILE *f=nullptr) { EmitterFile em(f); return em.emit_as(EMIT_YAML, t, /*error_on_excess*/true).len; } -RYML_DEPRECATE_EMIT inline size_t emit(Tree const& t, FILE *f=nullptr) +/** (1) emit JSON to the given file. A null file defaults to stdout. + * Return the number of bytes written. */ +inline size_t emit_json(Tree const& t, EmitOptions const& opts, FILE *f=nullptr) { - return emit_yaml(t, f); + EmitterFile em(opts, f); + return em.emit_as(EMIT_JSON, t, /*error_on_excess*/true).len; } - -/** emit JSON to the given file. A null file defaults to stdout. - * Return the number of bytes written. - * @overload */ +/** (2) like (1), but use default emit options */ inline size_t emit_json(Tree const& t, FILE *f=nullptr) { EmitterFile em(f); @@ -22201,31 +25177,53 @@ inline size_t emit_json(Tree const& t, FILE *f=nullptr) } -/** emit YAML to the given file. A null file defaults to stdout. - * Return the number of bytes written. - * @overload */ +// emit from ConstNodeRef ------------------------ + +/** (1) emit YAML to the given file. A null file defaults to stdout. + * Return the number of bytes written. */ +inline size_t emit_yaml(ConstNodeRef const& r, EmitOptions const& opts, FILE *f=nullptr) +{ + if(!detail::is_set_(r)) + return {}; + EmitterFile em(opts, f); + return em.emit_as(EMIT_YAML, r, /*error_on_excess*/true).len; +} +/** (2) like (1), but use default emit options */ inline size_t emit_yaml(ConstNodeRef const& r, FILE *f=nullptr) { + if(!detail::is_set_(r)) + return {}; EmitterFile em(f); return em.emit_as(EMIT_YAML, r, /*error_on_excess*/true).len; } -RYML_DEPRECATE_EMIT inline size_t emit(ConstNodeRef const& r, FILE *f=nullptr) +/** (1) emit JSON to the given file. A null file defaults to stdout. + * Return the number of bytes written. */ +inline size_t emit_json(ConstNodeRef const& r, EmitOptions const& opts, FILE *f=nullptr) { - return emit_yaml(r, f); + if(!detail::is_set_(r)) + return {}; + EmitterFile em(opts, f); + return em.emit_as(EMIT_JSON, r, /*error_on_excess*/true).len; } - -/** emit JSON to the given file. A null file defaults to stdout. - * Return the number of bytes written. - * @overload */ +/** (2) like (1), but use default emit options */ inline size_t emit_json(ConstNodeRef const& r, FILE *f=nullptr) { + if(!detail::is_set_(r)) + return {}; EmitterFile em(f); return em.emit_as(EMIT_JSON, r, /*error_on_excess*/true).len; } +/** @} */ + //----------------------------------------------------------------------------- +/** @defgroup doc_emit_to_ostream Emit to an STL-like ostream + * + * @{ + */ + /** emit YAML to an STL-like ostream */ template inline OStream& operator<< (OStream& s, Tree const& t) @@ -22240,63 +25238,166 @@ inline OStream& operator<< (OStream& s, Tree const& t) template inline OStream& operator<< (OStream& s, ConstNodeRef const& n) { + if(!detail::is_set_(n)) + return s; EmitterOStream em(s); em.emit_as(EMIT_YAML, n); return s; } +/** mark a tree or node to be emitted as yaml when using @ref + * operator<<, with options. For example: + * + * ```cpp + * Tree t = parse_in_arena("{foo: bar}"); + * std::cout << t; // emits YAML + * std::cout << as_yaml(t); // emits YAML, same as above + * std::cout << as_yaml(t, EmitOptions().max_depth(10)); // emits JSON with a max tree depth + * ``` + * + * @see @ref operator<< */ +struct as_json +{ + Tree const* tree; + size_t node; + EmitOptions options; + as_json(Tree const& t, EmitOptions const& opts={}) : tree(&t), node(t.empty() ? NONE : t.root_id()), options(opts) {} + as_json(Tree const& t, size_t id, EmitOptions const& opts={}) : tree(&t), node(id), options(opts) {} + as_json(ConstNodeRef const& n, EmitOptions const& opts={}) : tree(n.tree()), node(n.id()), options(opts) {} +}; + +/** mark a tree or node to be emitted as yaml when using @ref + * operator<< . For example: + * + * ```cpp + * Tree t = parse_in_arena("{foo: bar}"); + * std::cout << t; // emits YAML + * std::cout << as_json(t); // emits JSON + * std::cout << as_json(t, EmitOptions().max_depth(10)); // emits JSON with a max tree depth + * ``` + * + * @see @ref operator<< */ +struct as_yaml +{ + Tree const* tree; + size_t node; + EmitOptions options; + as_yaml(Tree const& t, EmitOptions const& opts={}) : tree(&t), node(t.empty() ? NONE : t.root_id()), options(opts) {} + as_yaml(Tree const& t, size_t id, EmitOptions const& opts={}) : tree(&t), node(id), options(opts) {} + as_yaml(ConstNodeRef const& n, EmitOptions const& opts={}) : tree(n.tree()), node(n.id()), options(opts) {} +}; + /** emit json to an STL-like stream */ template inline OStream& operator<< (OStream& s, as_json const& j) { - EmitterOStream em(s); + if(!j.tree || j.node == NONE) + return s; + EmitterOStream em(j.options, s); em.emit_as(EMIT_JSON, *j.tree, j.node, true); return s; } +/** emit yaml to an STL-like stream */ +template +inline OStream& operator<< (OStream& s, as_yaml const& y) +{ + if(!y.tree || y.node == NONE) + return s; + EmitterOStream em(y.options, s); + em.emit_as(EMIT_YAML, *y.tree, y.node, true); + return s; +} + +/** @} */ + //----------------------------------------------------------------------------- +/** @defgroup doc_emit_to_buffer Emit to memory buffer + * + * @{ + */ + +// emit from tree and node id ----------------------- -/** emit YAML to the given buffer. Return a substr trimmed to the emitted YAML. +/** (1) emit YAML to the given buffer. Return a substr trimmed to the emitted YAML. + * @param t the tree to emit. + * @param id the node where to start emitting. + * @param opts emit options. + * @param buf the output buffer. + * @param opts emit options. * @param error_on_excess Raise an error if the space in the buffer is insufficient. - * @overload */ -inline substr emit_yaml(Tree const& t, size_t id, substr buf, bool error_on_excess=true) + * @return a substr trimmed to the result in the output buffer. If the buffer is + * insufficient (when error_on_excess is false), the string pointer of the + * result will be set to null, and the length will report the required buffer size. */ +inline substr emit_yaml(Tree const& t, id_type id, EmitOptions const& opts, substr buf, bool error_on_excess=true) { - EmitterBuf em(buf); + EmitterBuf em(opts, buf); return em.emit_as(EMIT_YAML, t, id, error_on_excess); } -RYML_DEPRECATE_EMIT inline substr emit(Tree const& t, size_t id, substr buf, bool error_on_excess=true) +/** (2) like (1), but use default emit options */ +inline substr emit_yaml(Tree const& t, id_type id, substr buf, bool error_on_excess=true) { - return emit_yaml(t, id, buf, error_on_excess); + EmitterBuf em(buf); + return em.emit_as(EMIT_YAML, t, id, error_on_excess); } - -/** emit JSON to the given buffer. Return a substr trimmed to the emitted JSON. +/** (1) emit JSON to the given buffer. Return a substr trimmed to the emitted JSON. + * @param t the tree to emit. + * @param id the node where to start emitting. + * @param opts emit options. + * @param buf the output buffer. + * @param opts emit options. * @param error_on_excess Raise an error if the space in the buffer is insufficient. - * @overload */ -inline substr emit_json(Tree const& t, size_t id, substr buf, bool error_on_excess=true) + * @return a substr trimmed to the result in the output buffer. If the buffer is + * insufficient (when error_on_excess is false), the string pointer of the + * result will be set to null, and the length will report the required buffer size. */ +inline substr emit_json(Tree const& t, id_type id, EmitOptions const& opts, substr buf, bool error_on_excess=true) +{ + EmitterBuf em(opts, buf); + return em.emit_as(EMIT_JSON, t, id, error_on_excess); +} +/** (2) like (1), but use default emit options */ +inline substr emit_json(Tree const& t, id_type id, substr buf, bool error_on_excess=true) { EmitterBuf em(buf); return em.emit_as(EMIT_JSON, t, id, error_on_excess); } -/** emit YAML to the given buffer. Return a substr trimmed to the emitted YAML. +// emit from root ------------------------- + +/** (1) emit YAML to the given buffer. Return a substr trimmed to the emitted YAML. + * @param t the tree; will be emitted from the root node. + * @param buf the output buffer. * @param error_on_excess Raise an error if the space in the buffer is insufficient. - * @overload */ + * @return a substr trimmed to the result in the output buffer. If the buffer is + * insufficient (when error_on_excess is false), the string pointer of the + * result will be set to null, and the length will report the required buffer size. */ +inline substr emit_yaml(Tree const& t, EmitOptions const& opts, substr buf, bool error_on_excess=true) +{ + EmitterBuf em(opts, buf); + return em.emit_as(EMIT_YAML, t, error_on_excess); +} +/** (2) like (1), but use default emit options */ inline substr emit_yaml(Tree const& t, substr buf, bool error_on_excess=true) { EmitterBuf em(buf); return em.emit_as(EMIT_YAML, t, error_on_excess); } -RYML_DEPRECATE_EMIT inline substr emit(Tree const& t, substr buf, bool error_on_excess=true) +/** (1) emit JSON to the given buffer. Return a substr trimmed to the emitted JSON. + * @param t the tree; will be emitted from the root node. + * @param buf the output buffer. + * @param error_on_excess Raise an error if the space in the buffer is insufficient. + * @return a substr trimmed to the result in the output buffer. If the buffer is + * insufficient (when error_on_excess is false), the string pointer of the + * result will be set to null, and the length will report the required buffer size. */ +inline substr emit_json(Tree const& t, EmitOptions const& opts, substr buf, bool error_on_excess=true) { - return emit_yaml(t, buf, error_on_excess); + EmitterBuf em(opts, buf); + return em.emit_as(EMIT_JSON, t, error_on_excess); } - -/** emit JSON to the given buffer. Return a substr trimmed to the emitted JSON. - * @param error_on_excess Raise an error if the space in the buffer is insufficient. - * @overload */ +/** (2) like (1), but use default emit options */ inline substr emit_json(Tree const& t, substr buf, bool error_on_excess=true) { EmitterBuf em(buf); @@ -22304,26 +25405,51 @@ inline substr emit_json(Tree const& t, substr buf, bool error_on_excess=true) } -/** emit YAML to the given buffer. Return a substr trimmed to the emitted YAML. +// emit from ConstNodeRef ------------------------ + +/** (1) emit YAML to the given buffer. Return a substr trimmed to the emitted YAML. + * @param r the starting node. + * @param buf the output buffer. + * @param opts emit options. * @param error_on_excess Raise an error if the space in the buffer is insufficient. - * @overload - */ + * @return a substr trimmed to the result in the output buffer. If the buffer is + * insufficient (when error_on_excess is false), the string pointer of the + * result will be set to null, and the length will report the required buffer size. */ +inline substr emit_yaml(ConstNodeRef const& r, EmitOptions const& opts, substr buf, bool error_on_excess=true) +{ + if(!detail::is_set_(r)) + return {}; + EmitterBuf em(opts, buf); + return em.emit_as(EMIT_YAML, r, error_on_excess); +} +/** (2) like (1), but use default emit options */ inline substr emit_yaml(ConstNodeRef const& r, substr buf, bool error_on_excess=true) { + if(!detail::is_set_(r)) + return {}; EmitterBuf em(buf); return em.emit_as(EMIT_YAML, r, error_on_excess); } -RYML_DEPRECATE_EMIT inline substr emit(ConstNodeRef const& r, substr buf, bool error_on_excess=true) +/** (1) emit JSON to the given buffer. Return a substr trimmed to the emitted JSON. + * @param r the starting node. + * @param buf the output buffer. + * @param opts emit options. + * @param error_on_excess Raise an error if the space in the buffer is insufficient. + * @return a substr trimmed to the result in the output buffer. If the buffer is + * insufficient (when error_on_excess is false), the string pointer of the + * result will be set to null, and the length will report the required buffer size. */ +inline substr emit_json(ConstNodeRef const& r, EmitOptions const& opts, substr buf, bool error_on_excess=true) { - return emit_yaml(r, buf, error_on_excess); + if(!detail::is_set_(r)) + return {}; + EmitterBuf em(opts, buf); + return em.emit_as(EMIT_JSON, r, error_on_excess); } - -/** emit JSON to the given buffer. Return a substr trimmed to the emitted JSON. - * @param error_on_excess Raise an error if the space in the buffer is insufficient. - * @overload - */ +/** (2) like (1), but use default emit options */ inline substr emit_json(ConstNodeRef const& r, substr buf, bool error_on_excess=true) { + if(!detail::is_set_(r)) + return {}; EmitterBuf em(buf); return em.emit_as(EMIT_JSON, r, error_on_excess); } @@ -22331,182 +25457,296 @@ inline substr emit_json(ConstNodeRef const& r, substr buf, bool error_on_excess= //----------------------------------------------------------------------------- -/** emit+resize: emit YAML to the given std::string/std::vector-like - * container, resizing it as needed to fit the emitted YAML. */ +/** @defgroup doc_emit_to_container Emit to resizeable container + * + * @{ + */ + +// emit from tree and node id --------------------------- + +/** (1) emit+resize: emit YAML to the given `std::string`/`std::vector`-like + * container, resizing it as needed to fit the emitted YAML. If @p append is + * set to true, the emitted YAML is appended at the end of the container. + * + * @return a substr trimmed to the emitted YAML (excluding the initial contents, when appending) */ template -substr emitrs_yaml(Tree const& t, size_t id, CharOwningContainer * cont) +substr emitrs_yaml(Tree const& t, id_type id, EmitOptions const& opts, CharOwningContainer * cont, bool append=false) { - substr buf = to_substr(*cont); - substr ret = emit_yaml(t, id, buf, /*error_on_excess*/false); + size_t startpos = append ? cont->size() : 0u; + cont->resize(cont->capacity()); // otherwise the first emit would be certain to fail + substr buf = to_substr(*cont).sub(startpos); + substr ret = emit_yaml(t, id, opts, buf, /*error_on_excess*/false); if(ret.str == nullptr && ret.len > 0) { - cont->resize(ret.len); - buf = to_substr(*cont); - ret = emit_yaml(t, id, buf, /*error_on_excess*/true); + cont->resize(startpos + ret.len); + buf = to_substr(*cont).sub(startpos); + ret = emit_yaml(t, id, opts, buf, /*error_on_excess*/true); + } + else + { + cont->resize(startpos + ret.len); } return ret; } +/** (2) like (1), but use default emit options */ template -RYML_DEPRECATE_EMITRS substr emitrs(Tree const& t, size_t id, CharOwningContainer * cont) +substr emitrs_yaml(Tree const& t, id_type id, CharOwningContainer * cont, bool append=false) { - return emitrs_yaml(t, id, cont); + return emitrs_yaml(t, id, EmitOptions{}, cont, append); } - -/** emit+resize: emit JSON to the given std::string/std::vector-like - * container, resizing it as needed to fit the emitted JSON. */ +/** (1) emit+resize: emit JSON to the given `std::string`/`std::vector`-like + * container, resizing it as needed to fit the emitted JSON. If @p append is + * set to true, the emitted YAML is appended at the end of the container. + * + * @return a substr trimmed to the emitted JSON (excluding the initial contents, when appending) */ template -substr emitrs_json(Tree const& t, size_t id, CharOwningContainer * cont) +substr emitrs_json(Tree const& t, id_type id, EmitOptions const& opts, CharOwningContainer * cont, bool append=false) { - substr buf = to_substr(*cont); - substr ret = emit_json(t, id, buf, /*error_on_excess*/false); + const size_t startpos = append ? cont->size() : 0u; + cont->resize(cont->capacity()); // otherwise the first emit would be certain to fail + substr buf = to_substr(*cont).sub(startpos); + EmitterBuf em(opts, buf); + substr ret = emit_json(t, id, opts, buf, /*error_on_excess*/false); if(ret.str == nullptr && ret.len > 0) { - cont->resize(ret.len); - buf = to_substr(*cont); - ret = emit_json(t, id, buf, /*error_on_excess*/true); + cont->resize(startpos + ret.len); + buf = to_substr(*cont).sub(startpos); + ret = emit_json(t, id, opts, buf, /*error_on_excess*/true); + } + else + { + cont->resize(startpos + ret.len); } return ret; } +/** (2) like (1), but use default emit options */ +template +substr emitrs_json(Tree const& t, id_type id, CharOwningContainer * cont, bool append=false) +{ + return emitrs_json(t, id, EmitOptions{}, cont, append); +} -/** emit+resize: emit YAML to the given std::string/std::vector-like - * container, resizing it as needed to fit the emitted YAML. */ +/** (3) emit+resize: YAML to a newly-created `std::string`/`std::vector`-like container. */ template -CharOwningContainer emitrs_yaml(Tree const& t, size_t id) +CharOwningContainer emitrs_yaml(Tree const& t, id_type id, EmitOptions const& opts={}) { CharOwningContainer c; - emitrs_yaml(t, id, &c); + emitrs_yaml(t, id, opts, &c); return c; } +/** (3) emit+resize: JSON to a newly-created `std::string`/`std::vector`-like container. */ template -RYML_DEPRECATE_EMITRS CharOwningContainer emitrs(Tree const& t, size_t id) +CharOwningContainer emitrs_json(Tree const& t, id_type id, EmitOptions const& opts={}) { CharOwningContainer c; - emitrs_yaml(t, id, &c); + emitrs_json(t, id, opts, &c); return c; } -/** emit+resize: emit JSON to the given std::string/std::vector-like - * container, resizing it as needed to fit the emitted JSON. */ + +// emit from root ------------------------- + +/** (1) emit+resize: YAML to the given `std::string`/`std::vector`-like + * container, resizing it as needed to fit the emitted YAML. + * @return a substr trimmed to the new emitted contents. */ template -CharOwningContainer emitrs_json(Tree const& t, size_t id) +substr emitrs_yaml(Tree const& t, EmitOptions const& opts, CharOwningContainer * cont, bool append=false) { - CharOwningContainer c; - emitrs_json(t, id, &c); - return c; + if(t.empty()) + return {}; + return emitrs_yaml(t, t.root_id(), opts, cont, append); } - - -/** emit+resize: YAML to the given std::string/std::vector-like - * container, resizing it as needed to fit the emitted YAML. */ +/** (2) like (1), but use default emit options */ template -substr emitrs_yaml(Tree const& t, CharOwningContainer * cont) +substr emitrs_yaml(Tree const& t, CharOwningContainer * cont, bool append=false) { if(t.empty()) return {}; - return emitrs_yaml(t, t.root_id(), cont); + return emitrs_yaml(t, t.root_id(), EmitOptions{}, cont, append); } +/** (1) emit+resize: JSON to the given `std::string`/`std::vector`-like + * container, resizing it as needed to fit the emitted JSON. + * @return a substr trimmed to the new emitted contents. */ template -RYML_DEPRECATE_EMITRS substr emitrs(Tree const& t, CharOwningContainer * cont) +substr emitrs_json(Tree const& t, EmitOptions const& opts, CharOwningContainer * cont, bool append=false) { - return emitrs_yaml(t, cont); + if(t.empty()) + return {}; + return emitrs_json(t, t.root_id(), opts, cont, append); } - -/** emit+resize: JSON to the given std::string/std::vector-like - * container, resizing it as needed to fit the emitted JSON. */ +/** (2) like (1), but use default emit options */ template -substr emitrs_json(Tree const& t, CharOwningContainer * cont) +substr emitrs_json(Tree const& t, CharOwningContainer * cont, bool append=false) { if(t.empty()) return {}; - return emitrs_json(t, t.root_id(), cont); + return emitrs_json(t, t.root_id(), EmitOptions{}, cont, append); } -/** emit+resize: YAML to the given std::string/std::vector-like container, - * resizing it as needed to fit the emitted YAML. */ +/** (3) emit+resize: YAML to a newly-created `std::string`/`std::vector`-like container. */ template -CharOwningContainer emitrs_yaml(Tree const& t) +CharOwningContainer emitrs_yaml(Tree const& t, EmitOptions const& opts={}) { CharOwningContainer c; if(t.empty()) return c; - emitrs_yaml(t, t.root_id(), &c); + emitrs_yaml(t, t.root_id(), opts, &c); return c; } +/** (3) emit+resize: JSON to a newly-created `std::string`/`std::vector`-like container. */ template -RYML_DEPRECATE_EMITRS CharOwningContainer emitrs(Tree const& t) -{ - return emitrs_yaml(t); -} - -/** emit+resize: JSON to the given std::string/std::vector-like container, - * resizing it as needed to fit the emitted JSON. */ -template -CharOwningContainer emitrs_json(Tree const& t) +CharOwningContainer emitrs_json(Tree const& t, EmitOptions const& opts={}) { CharOwningContainer c; if(t.empty()) return c; - emitrs_json(t, t.root_id(), &c); + emitrs_json(t, t.root_id(), opts, &c); return c; } -/** emit+resize: YAML to the given std::string/std::vector-like container, - * resizing it as needed to fit the emitted YAML. */ +// emit from ConstNodeRef ------------------------ + + +/** (1) emit+resize: YAML to the given `std::string`/`std::vector`-like container, + * resizing it as needed to fit the emitted YAML. + * @return a substr trimmed to the new emitted contents */ template -substr emitrs_yaml(ConstNodeRef const& n, CharOwningContainer * cont) +substr emitrs_yaml(ConstNodeRef const& n, EmitOptions const& opts, CharOwningContainer * cont, bool append=false) { - _RYML_CB_CHECK(n.tree()->callbacks(), n.valid()); - return emitrs_yaml(*n.tree(), n.id(), cont); + if(!detail::is_set_(n)) + return {}; + _RYML_CB_CHECK(n.tree()->callbacks(), n.readable()); + return emitrs_yaml(*n.tree(), n.id(), opts, cont, append); } +/** (2) like (1), but use default emit options */ template -RYML_DEPRECATE_EMITRS substr emitrs(ConstNodeRef const& n, CharOwningContainer * cont) +substr emitrs_yaml(ConstNodeRef const& n, CharOwningContainer * cont, bool append=false) { - return emitrs_yaml(n, cont); + if(!detail::is_set_(n)) + return {}; + _RYML_CB_CHECK(n.tree()->callbacks(), n.readable()); + return emitrs_yaml(*n.tree(), n.id(), EmitOptions{}, cont, append); } - -/** emit+resize: JSON to the given std::string/std::vector-like container, - * resizing it as needed to fit the emitted JSON. */ +/** (1) emit+resize: JSON to the given `std::string`/`std::vector`-like container, + * resizing it as needed to fit the emitted JSON. + * @return a substr trimmed to the new emitted contents */ +template +substr emitrs_json(ConstNodeRef const& n, EmitOptions const& opts, CharOwningContainer * cont, bool append=false) +{ + if(!detail::is_set_(n)) + return {}; + _RYML_CB_CHECK(n.tree()->callbacks(), n.readable()); + return emitrs_json(*n.tree(), n.id(), opts, cont, append); +} +/** (2) like (1), but use default emit options */ template -substr emitrs_json(ConstNodeRef const& n, CharOwningContainer * cont) +substr emitrs_json(ConstNodeRef const& n, CharOwningContainer * cont, bool append=false) { - _RYML_CB_CHECK(n.tree()->callbacks(), n.valid()); - return emitrs_json(*n.tree(), n.id(), cont); + if(!detail::is_set_(n)) + return {}; + _RYML_CB_CHECK(n.tree()->callbacks(), n.readable()); + return emitrs_json(*n.tree(), n.id(), EmitOptions{}, cont, append); } -/** emit+resize: YAML to the given std::string/std::vector-like container, - * resizing it as needed to fit the emitted YAML. */ +/** (3) emit+resize: YAML to a newly-created `std::string`/`std::vector`-like container. */ template -CharOwningContainer emitrs_yaml(ConstNodeRef const& n) +CharOwningContainer emitrs_yaml(ConstNodeRef const& n, EmitOptions const& opts={}) { - _RYML_CB_CHECK(n.tree()->callbacks(), n.valid()); + if(!detail::is_set_(n)) + return {}; + _RYML_CB_CHECK(n.tree()->callbacks(), n.readable()); CharOwningContainer c; - emitrs_yaml(*n.tree(), n.id(), &c); + emitrs_yaml(*n.tree(), n.id(), opts, &c); return c; } +/** (3) emit+resize: JSON to a newly-created `std::string`/`std::vector`-like container. */ template -RYML_DEPRECATE_EMITRS CharOwningContainer emitrs(ConstNodeRef const& n) +CharOwningContainer emitrs_json(ConstNodeRef const& n, EmitOptions const& opts={}) { - return emitrs_yaml(n); + if(!detail::is_set_(n)) + return {}; + _RYML_CB_CHECK(n.tree()->callbacks(), n.readable()); + CharOwningContainer c; + emitrs_json(*n.tree(), n.id(), opts, &c); + return c; +} + + +/** @} */ + + +//----------------------------------------------------------------------------- + +/** @cond dev */ + +RYML_DEPRECATE_EMIT inline size_t emit(Tree const& t, id_type id, FILE *f) +{ + return emit_yaml(t, id, f); +} +RYML_DEPRECATE_EMIT inline size_t emit(Tree const& t, FILE *f=nullptr) +{ + return emit_yaml(t, f); +} +RYML_DEPRECATE_EMIT inline size_t emit(ConstNodeRef const& r, FILE *f=nullptr) +{ + return emit_yaml(r, f); +} + +RYML_DEPRECATE_EMIT inline substr emit(Tree const& t, id_type id, substr buf, bool error_on_excess=true) +{ + return emit_yaml(t, id, buf, error_on_excess); +} +RYML_DEPRECATE_EMIT inline substr emit(Tree const& t, substr buf, bool error_on_excess=true) +{ + return emit_yaml(t, buf, error_on_excess); +} +RYML_DEPRECATE_EMIT inline substr emit(ConstNodeRef const& r, substr buf, bool error_on_excess=true) +{ + return emit_yaml(r, buf, error_on_excess); } -/** emit+resize: JSON to the given std::string/std::vector-like container, - * resizing it as needed to fit the emitted JSON. */ template -CharOwningContainer emitrs_json(ConstNodeRef const& n) +RYML_DEPRECATE_EMITRS substr emitrs(Tree const& t, id_type id, CharOwningContainer * cont) { - _RYML_CB_CHECK(n.tree()->callbacks(), n.valid()); - CharOwningContainer c; - emitrs_json(*n.tree(), n.id(), &c); - return c; + return emitrs_yaml(t, id, cont); +} +template +RYML_DEPRECATE_EMITRS CharOwningContainer emitrs(Tree const& t, id_type id) +{ + return emitrs_yaml(t, id); +} +template +RYML_DEPRECATE_EMITRS substr emitrs(Tree const& t, CharOwningContainer * cont) +{ + return emitrs_yaml(t, cont); +} +template +RYML_DEPRECATE_EMITRS CharOwningContainer emitrs(Tree const& t) +{ + return emitrs_yaml(t); +} +template +RYML_DEPRECATE_EMITRS substr emitrs(ConstNodeRef const& n, CharOwningContainer * cont) +{ + return emitrs_yaml(n, cont); +} +template +RYML_DEPRECATE_EMITRS CharOwningContainer emitrs(ConstNodeRef const& n) +{ + return emitrs_yaml(n); } +/** @endcond */ + } // namespace yml } // namespace c4 +C4_SUPPRESS_WARNING_GCC_CLANG_POP + #undef RYML_DEPRECATE_EMIT #undef RYML_DEPRECATE_EMITRS @@ -22545,57 +25785,57 @@ CharOwningContainer emitrs_json(ConstNodeRef const& n) #endif +/** @file emit.def.hpp Definitions for emit functions. */ +#ifndef _C4_YML_DETAIL_PARSER_DBG_HPP_ +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp +//#include "c4/yml/detail/parser_dbg.hpp" +#if !defined(C4_YML_DETAIL_PARSER_DBG_HPP_) && !defined(_C4_YML_DETAIL_PARSER_DBG_HPP_) +#error "amalgamate: file c4/yml/detail/parser_dbg.hpp must have been included at this point" +#endif /* C4_YML_DETAIL_PARSER_DBG_HPP_ */ + +#endif + namespace c4 { namespace yml { template -substr Emitter::emit_as(EmitType_e type, Tree const& t, size_t id, bool error_on_excess) +substr Emitter::emit_as(EmitType_e type, Tree const& t, id_type id, bool error_on_excess) { if(t.empty()) { _RYML_CB_ASSERT(t.callbacks(), id == NONE); return {}; } - _RYML_CB_CHECK(t.callbacks(), id < t.size()); + if(id == NONE) + id = t.root_id(); + _RYML_CB_CHECK(t.callbacks(), id < t.capacity()); m_tree = &t; + m_flow = false; if(type == EMIT_YAML) _emit_yaml(id); else if(type == EMIT_JSON) - _do_visit_json(id); + _do_visit_json(id, 0); else _RYML_CB_ERR(m_tree->callbacks(), "unknown emit type"); + m_tree = nullptr; return this->Writer::_get(error_on_excess); } -template -substr Emitter::emit_as(EmitType_e type, Tree const& t, bool error_on_excess) -{ - if(t.empty()) - return {}; - return this->emit_as(type, t, t.root_id(), error_on_excess); -} - -template -substr Emitter::emit_as(EmitType_e type, ConstNodeRef const& n, bool error_on_excess) -{ - _RYML_CB_CHECK(n.tree()->callbacks(), n.valid()); - return this->emit_as(type, *n.tree(), n.id(), error_on_excess); -} - //----------------------------------------------------------------------------- template -void Emitter::_emit_yaml(size_t id) +void Emitter::_emit_yaml(id_type id) { // save branches in the visitor by doing the initial stream/doc // logic here, sparing the need to check stream/val/keyval inside // the visitor functions - auto dispatch = [this](size_t node){ + auto dispatch = [this](id_type node){ NodeType ty = m_tree->type(node); - if(ty.marked_flow_sl()) + if(ty.is_flow_sl()) _do_visit_flow_sl(node, 0); - else if(ty.marked_flow_ml()) + else if(ty.is_flow_ml()) _do_visit_flow_ml(node, 0); else { @@ -22604,48 +25844,51 @@ void Emitter::_emit_yaml(size_t id) }; if(!m_tree->is_root(id)) { - if(m_tree->is_container(id) && !m_tree->type(id).marked_flow()) + if(m_tree->is_container(id) && !m_tree->type(id).is_flow()) { - size_t ilevel = 0; + id_type ilevel = 0; if(m_tree->has_key(id)) { this->Writer::_do_write(m_tree->key(id)); this->Writer::_do_write(":\n"); ++ilevel; } - _do_visit_block_container(id, ilevel, ilevel); + _do_visit_block_container(id, 0, ilevel, ilevel); return; } } - auto *btd = m_tree->tag_directives().b; - auto *etd = m_tree->tag_directives().e; - auto write_tag_directives = [&btd, etd, this](size_t next_node){ - auto end = btd; - while(end < etd) + TagDirectiveRange tagds = m_tree->tag_directives(); + auto write_tag_directives = [&tagds, this](const id_type next_node){ + TagDirective const* C4_RESTRICT end = tagds.b; + while(end < tagds.e) { if(end->next_node_id > next_node) break; ++end; } - for( ; btd != end; ++btd) + const id_type parent = m_tree->parent(next_node); + for( ; tagds.b != end; ++tagds.b) { - if(next_node != m_tree->first_child(m_tree->parent(next_node))) + if(next_node != m_tree->first_child(parent)) this->Writer::_do_write("...\n"); this->Writer::_do_write("%TAG "); - this->Writer::_do_write(btd->handle); + this->Writer::_do_write(tagds.b->handle); this->Writer::_do_write(' '); - this->Writer::_do_write(btd->prefix); + this->Writer::_do_write(tagds.b->prefix); this->Writer::_do_write('\n'); } }; if(m_tree->is_stream(id)) { - if(m_tree->first_child(id) != NONE) - write_tag_directives(m_tree->first_child(id)); - for(size_t child = m_tree->first_child(id); child != NONE; child = m_tree->next_sibling(child)) + const id_type first_child = m_tree->first_child(id); + if(first_child != NONE) + write_tag_directives(first_child); + for(id_type child = first_child; child != NONE; child = m_tree->next_sibling(child)) { dispatch(child); + if(m_tree->is_doc(child) && m_tree->type(child).is_flow_sl()) + this->Writer::_do_write('\n'); if(m_tree->next_sibling(child) != NONE) write_tag_directives(m_tree->next_sibling(child)); } @@ -22665,14 +25908,14 @@ void Emitter::_emit_yaml(size_t id) _writek(id, 0); this->Writer::_do_write(": "); _writev(id, 0); - if(!m_tree->type(id).marked_flow()) + if(!m_tree->type(id).is_flow()) this->Writer::_do_write('\n'); } else if(m_tree->is_val(id)) { //this->Writer::_do_write("- "); _writev(id, 0); - if(!m_tree->type(id).marked_flow()) + if(!m_tree->type(id).is_flow()) this->Writer::_do_write('\n'); } else if(m_tree->type(id) == NOTYPE) @@ -22685,58 +25928,138 @@ void Emitter::_emit_yaml(size_t id) } } +#define _rymlindent_nextline() this->_indent(ilevel + 1); + template -void Emitter::_write_doc(size_t id) +void Emitter::_write_doc(id_type id) { RYML_ASSERT(m_tree->is_doc(id)); + RYML_ASSERT(!m_tree->has_key(id)); if(!m_tree->is_root(id)) { RYML_ASSERT(m_tree->is_stream(m_tree->parent(id))); this->Writer::_do_write("---"); } + // if(!m_tree->has_val(id)) // this is more frequent { - if(m_tree->has_val_tag(id)) + const bool tag = m_tree->has_val_tag(id); + const bool anchor = m_tree->has_val_anchor(id); + if(!tag && !anchor) + { + ; + } + else if(!tag && anchor) + { + if(!m_tree->is_root(id)) + this->Writer::_do_write(' '); + this->Writer::_do_write('&'); + this->Writer::_do_write(m_tree->val_anchor(id)); + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + if(m_tree->has_children(id) && m_tree->is_root(id)) + this->Writer::_do_write('\n'); + #endif + } + else if(tag && !anchor) { if(!m_tree->is_root(id)) this->Writer::_do_write(' '); _write_tag(m_tree->val_tag(id)); + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + if(m_tree->has_children(id) && m_tree->is_root(id)) + this->Writer::_do_write('\n'); + #endif } - if(m_tree->has_val_anchor(id)) + else // tag && anchor { if(!m_tree->is_root(id)) this->Writer::_do_write(' '); - this->Writer::_do_write('&'); + _write_tag(m_tree->val_tag(id)); + this->Writer::_do_write(" &"); this->Writer::_do_write(m_tree->val_anchor(id)); + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + if(m_tree->has_children(id) && m_tree->is_root(id)) + this->Writer::_do_write('\n'); + #endif } } else // docval { - RYML_ASSERT(m_tree->has_val(id)); - RYML_ASSERT(!m_tree->has_key(id)); - if(!m_tree->is_root(id)) - this->Writer::_do_write(' '); - _writev(id, 0); + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->has_val(id)); + // some plain scalars such as '...' and '---' must not + // appear at 0-indentation + const csubstr val = m_tree->val(id); + const bool preceded_by_3_dashes = !m_tree->is_root(id); + const type_bits style_marks = m_tree->type(id) & (KEY_STYLE|VAL_STYLE); + const bool is_plain = m_tree->type(id).is_val_plain(); + const bool is_ambiguous = (is_plain || !style_marks) + && ((val.begins_with("...") || val.begins_with("---")) + || + (val.find('\n') != npos)); + if(preceded_by_3_dashes) + { + if(val.len == 0 && !m_tree->has_val_anchor(id) && !m_tree->has_val_tag(id)) + { + this->Writer::_do_write('\n'); + return; + } + else if(val.len && is_ambiguous) + { + this->Writer::_do_write('\n'); + } + else + { + this->Writer::_do_write(' '); + } + } + id_type ilevel = 0u; + if(is_ambiguous) + { + _rymlindent_nextline(); + ++ilevel; + } + _writev(id, ilevel); + if(val.len && m_tree->is_root(id)) + this->Writer::_do_write('\n'); } - this->Writer::_do_write('\n'); + if(!m_tree->is_root(id)) + this->Writer::_do_write('\n'); } template -void Emitter::_do_visit_flow_sl(size_t node, size_t ilevel) +void Emitter::_do_visit_flow_sl(id_type node, id_type depth, id_type ilevel) { - RYML_ASSERT(!m_tree->is_stream(node)); - RYML_ASSERT(m_tree->is_container(node) || m_tree->is_doc(node)); - RYML_ASSERT(m_tree->is_root(node) || (m_tree->parent_is_map(node) || m_tree->parent_is_seq(node))); + const bool prev_flow = m_flow; + m_flow = true; + _RYML_CB_ASSERT(m_tree->callbacks(), !m_tree->is_stream(node)); + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_container(node) || m_tree->is_doc(node)); + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_root(node) || (m_tree->parent_is_map(node) || m_tree->parent_is_seq(node))); + if(C4_UNLIKELY(depth > m_opts.max_depth())) + _RYML_CB_ERR(m_tree->callbacks(), "max depth exceeded"); if(m_tree->is_doc(node)) { _write_doc(node); + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED if(!m_tree->has_children(node)) return; + else + #endif + { + if(m_tree->is_map(node)) + { + this->Writer::_do_write('{'); + } + else + { + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_seq(node)); + this->Writer::_do_write('['); + } + } } else if(m_tree->is_container(node)) { - RYML_ASSERT(m_tree->is_map(node) || m_tree->is_seq(node)); + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_map(node) || m_tree->is_seq(node)); bool spc = false; // write a space @@ -22778,7 +26101,7 @@ void Emitter::_do_visit_flow_sl(size_t node, size_t ilevel) } } // container - for(size_t child = m_tree->first_child(node), count = 0; child != NONE; child = m_tree->next_sibling(child)) + for(id_type child = m_tree->first_child(node), count = 0; child != NONE; child = m_tree->next_sibling(child)) { if(count++) this->Writer::_do_write(','); @@ -22795,7 +26118,7 @@ void Emitter::_do_visit_flow_sl(size_t node, size_t ilevel) else { // with single-line flow, we can never go back to block - _do_visit_flow_sl(child, ilevel + 1); + _do_visit_flow_sl(child, depth + 1, ilevel + 1); } } @@ -22807,110 +26130,118 @@ void Emitter::_do_visit_flow_sl(size_t node, size_t ilevel) { this->Writer::_do_write(']'); } + m_flow = prev_flow; } +C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(4702) // unreachable error, triggered by flow_ml not implemented + template -void Emitter::_do_visit_flow_ml(size_t id, size_t ilevel, size_t do_indent) +void Emitter::_do_visit_flow_ml(id_type id, id_type depth, id_type ilevel, id_type do_indent) { C4_UNUSED(id); + C4_UNUSED(depth); C4_UNUSED(ilevel); C4_UNUSED(do_indent); - RYML_CHECK(false/*not implemented*/); + c4::yml::error("not implemented"); + #ifdef THIS_IS_A_WORK_IN_PROGRESS + if(C4_UNLIKELY(depth > m_opts.max_depth())) + _RYML_CB_ERR(m_tree->callbacks(), "max depth exceeded"); + const bool prev_flow = m_flow; + m_flow = true; + // do it... + m_flow = prev_flow; + #endif } template -void Emitter::_do_visit_block_container(size_t node, size_t next_level, size_t do_indent) +void Emitter::_do_visit_block_container(id_type node, id_type depth, id_type level, bool do_indent) { - RepC ind = indent_to(do_indent * next_level); - if(m_tree->is_seq(node)) { - for(size_t child = m_tree->first_child(node); child != NONE; child = m_tree->next_sibling(child)) + for(id_type child = m_tree->first_child(node); child != NONE; child = m_tree->next_sibling(child)) { _RYML_CB_ASSERT(m_tree->callbacks(), !m_tree->has_key(child)); if(m_tree->is_val(child)) { - this->Writer::_do_write(ind); + _indent(level, do_indent); this->Writer::_do_write("- "); - _writev(child, next_level); + _writev(child, level); this->Writer::_do_write('\n'); } else { _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_container(child)); NodeType ty = m_tree->type(child); - if(ty.marked_flow_sl()) + if(ty.is_flow_sl()) { - this->Writer::_do_write(ind); + _indent(level, do_indent); this->Writer::_do_write("- "); - _do_visit_flow_sl(child, 0u); + _do_visit_flow_sl(child, depth+1, 0u); this->Writer::_do_write('\n'); } - else if(ty.marked_flow_ml()) + else if(ty.is_flow_ml()) { - this->Writer::_do_write(ind); + _indent(level, do_indent); this->Writer::_do_write("- "); - _do_visit_flow_ml(child, next_level, do_indent); + _do_visit_flow_ml(child, depth+1, 0u, do_indent); this->Writer::_do_write('\n'); } else { - _do_visit_block(child, next_level, do_indent); + _do_visit_block(child, depth+1, level, do_indent); // same indentation level } } do_indent = true; - ind = indent_to(do_indent * next_level); } } else // map { _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_map(node)); - for(size_t ich = m_tree->first_child(node); ich != NONE; ich = m_tree->next_sibling(ich)) + for(id_type ich = m_tree->first_child(node); ich != NONE; ich = m_tree->next_sibling(ich)) { _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->has_key(ich)); if(m_tree->is_keyval(ich)) { - this->Writer::_do_write(ind); - _writek(ich, next_level); + _indent(level, do_indent); + _writek(ich, level); this->Writer::_do_write(": "); - _writev(ich, next_level); + _writev(ich, level); this->Writer::_do_write('\n'); } else { _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_container(ich)); NodeType ty = m_tree->type(ich); - if(ty.marked_flow_sl()) + if(ty.is_flow_sl()) { - this->Writer::_do_write(ind); - _do_visit_flow_sl(ich, 0u); + _indent(level, do_indent); + _do_visit_flow_sl(ich, depth+1, 0u); this->Writer::_do_write('\n'); } - else if(ty.marked_flow_ml()) + else if(ty.is_flow_ml()) { - this->Writer::_do_write(ind); - _do_visit_flow_ml(ich, 0u); + _indent(level, do_indent); + _do_visit_flow_ml(ich, depth+1, 0u); this->Writer::_do_write('\n'); } else { - _do_visit_block(ich, next_level, do_indent); + _do_visit_block(ich, depth+1, level, do_indent); // same level! } - } + } // keyval vs container do_indent = true; - ind = indent_to(do_indent * next_level); - } - } + } // for children + } // seq vs map } template -void Emitter::_do_visit_block(size_t node, size_t ilevel, size_t do_indent) +void Emitter::_do_visit_block(id_type node, id_type depth, id_type ilevel, id_type do_indent) { - RYML_ASSERT(!m_tree->is_stream(node)); - RYML_ASSERT(m_tree->is_container(node) || m_tree->is_doc(node)); - RYML_ASSERT(m_tree->is_root(node) || (m_tree->parent_is_map(node) || m_tree->parent_is_seq(node))); - RepC ind = indent_to(do_indent * ilevel); - + _RYML_CB_ASSERT(m_tree->callbacks(), !m_tree->is_stream(node)); + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_container(node) || m_tree->is_doc(node)); + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_root(node) || (m_tree->parent_is_map(node) || m_tree->parent_is_seq(node))); + if(C4_UNLIKELY(depth > m_opts.max_depth())) + _RYML_CB_ERR(m_tree->callbacks(), "max depth exceeded"); if(m_tree->is_doc(node)) { _write_doc(node); @@ -22919,21 +26250,19 @@ void Emitter::_do_visit_block(size_t node, size_t ilevel, size_t do_inde } else if(m_tree->is_container(node)) { - RYML_ASSERT(m_tree->is_map(node) || m_tree->is_seq(node)); - + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_map(node) || m_tree->is_seq(node)); bool spc = false; // write a space bool nl = false; // write a newline - if(m_tree->has_key(node)) { - this->Writer::_do_write(ind); + _indent(ilevel, do_indent); _writek(node, ilevel); this->Writer::_do_write(':'); spc = true; } else if(!m_tree->is_root(node)) { - this->Writer::_do_write(ind); + _indent(ilevel, do_indent); this->Writer::_do_write('-'); spc = true; } @@ -22985,17 +26314,22 @@ void Emitter::_do_visit_block(size_t node, size_t ilevel, size_t do_inde } } // container - size_t next_level = ilevel + 1; + id_type next_level = ilevel + 1; if(m_tree->is_root(node) || m_tree->is_doc(node)) next_level = ilevel; // do not indent at top level - _do_visit_block_container(node, next_level, do_indent); + _do_visit_block_container(node, depth, next_level, do_indent); } +C4_SUPPRESS_WARNING_MSVC_POP + + template -void Emitter::_do_visit_json(size_t id) +void Emitter::_do_visit_json(id_type id, id_type depth) { _RYML_CB_CHECK(m_tree->callbacks(), !m_tree->is_stream(id)); // JSON does not have streams + if(C4_UNLIKELY(depth > m_opts.max_depth())) + _RYML_CB_ERR(m_tree->callbacks(), "max depth exceeded"); if(m_tree->is_keyval(id)) { _writek_json(id); @@ -23019,11 +26353,11 @@ void Emitter::_do_visit_json(size_t id) this->Writer::_do_write('{'); } // container - for(size_t ich = m_tree->first_child(id); ich != NONE; ich = m_tree->next_sibling(ich)) + for(id_type ich = m_tree->first_child(id); ich != NONE; ich = m_tree->next_sibling(ich)) { if(ich != m_tree->first_child(id)) this->Writer::_do_write(','); - _do_visit_json(ich); + _do_visit_json(ich, depth+1); } if(m_tree->is_seq(id)) @@ -23033,7 +26367,7 @@ void Emitter::_do_visit_json(size_t id) } template -void Emitter::_write(NodeScalar const& C4_RESTRICT sc, NodeType flags, size_t ilevel) +void Emitter::_write(NodeScalar const& C4_RESTRICT sc, NodeType flags, id_type ilevel) { if( ! sc.tag.empty()) { @@ -23053,98 +26387,162 @@ void Emitter::_write(NodeScalar const& C4_RESTRICT sc, NodeType flags, s if(sc.anchor != "<<") this->Writer::_do_write('*'); this->Writer::_do_write(sc.anchor); + if(flags.is_key_ref()) + this->Writer::_do_write(' '); return; } // ensure the style flags only have one of KEY or VAL - _RYML_CB_ASSERT(m_tree->callbacks(), ((flags & (_WIP_KEY_STYLE|_WIP_VAL_STYLE)) == 0) || (((flags&_WIP_KEY_STYLE) == 0) != ((flags&_WIP_VAL_STYLE) == 0))); - - auto style_marks = flags & (_WIP_KEY_STYLE|_WIP_VAL_STYLE); - if(style_marks & (_WIP_KEY_LITERAL|_WIP_VAL_LITERAL)) + _RYML_CB_ASSERT(m_tree->callbacks(), ((flags & SCALAR_STYLE) == 0) || (((flags&KEY_STYLE) == 0) != ((flags&VAL_STYLE) == 0))); + type_bits style_marks = flags & SCALAR_STYLE; + if(!style_marks) + style_marks = scalar_style_choose(sc.scalar); + if(style_marks & (KEY_LITERAL|VAL_LITERAL)) { _write_scalar_literal(sc.scalar, ilevel, flags.has_key()); } - else if(style_marks & (_WIP_KEY_FOLDED|_WIP_VAL_FOLDED)) + else if(style_marks & (KEY_FOLDED|VAL_FOLDED)) { _write_scalar_folded(sc.scalar, ilevel, flags.has_key()); } - else if(style_marks & (_WIP_KEY_SQUO|_WIP_VAL_SQUO)) + else if(style_marks & (KEY_SQUO|VAL_SQUO)) { _write_scalar_squo(sc.scalar, ilevel); } - else if(style_marks & (_WIP_KEY_DQUO|_WIP_VAL_DQUO)) + else if(style_marks & (KEY_DQUO|VAL_DQUO)) { _write_scalar_dquo(sc.scalar, ilevel); } - else if(style_marks & (_WIP_KEY_PLAIN|_WIP_VAL_PLAIN)) - { - _write_scalar_plain(sc.scalar, ilevel); - } - else if(!style_marks) + else if(style_marks & (KEY_PLAIN|VAL_PLAIN)) { - size_t first_non_nl = sc.scalar.first_not_of('\n'); - bool all_newlines = first_non_nl == npos; - bool has_leading_ws = (!all_newlines) && sc.scalar.sub(first_non_nl).begins_with_any(" \t"); - bool do_literal = ((!sc.scalar.empty() && all_newlines) || (has_leading_ws && !sc.scalar.trim(' ').empty())); - if(do_literal) - { - _write_scalar_literal(sc.scalar, ilevel, flags.has_key(), /*explicit_indentation*/has_leading_ws); - } + if(C4_LIKELY(!(sc.scalar.begins_with(": ") || sc.scalar.begins_with(":\t")))) + _write_scalar_plain(sc.scalar, ilevel); else - { - for(size_t i = 0; i < sc.scalar.len; ++i) - { - if(sc.scalar.str[i] == '\n') - { - _write_scalar_literal(sc.scalar, ilevel, flags.has_key(), /*explicit_indentation*/has_leading_ws); - goto wrote_special; - } - // todo: check for escaped characters requiring double quotes - } - _write_scalar(sc.scalar, flags.is_quoted()); - wrote_special: - ; - } + _write_scalar_squo(sc.scalar, ilevel); } else { _RYML_CB_ERR(m_tree->callbacks(), "not implemented"); } } + template void Emitter::_write_json(NodeScalar const& C4_RESTRICT sc, NodeType flags) { - if(C4_UNLIKELY( ! sc.tag.empty())) - _RYML_CB_ERR(m_tree->callbacks(), "JSON does not have tags"); + if(flags & (KEYTAG|VALTAG)) + if(m_opts.json_error_flags() & EmitOptions::JSON_ERR_ON_TAG) + _RYML_CB_ERR(m_tree->callbacks(), "JSON does not have tags"); if(C4_UNLIKELY(flags.has_anchor())) - _RYML_CB_ERR(m_tree->callbacks(), "JSON does not have anchors"); - _write_scalar_json(sc.scalar, flags.has_key(), flags.is_quoted()); + if(m_opts.json_error_flags() & EmitOptions::JSON_ERR_ON_ANCHOR) + _RYML_CB_ERR(m_tree->callbacks(), "JSON does not have anchors"); + if(sc.scalar.len) + { + // use double quoted style... + // if it is a key (mandatory in JSON) + // if the style is marked quoted + bool dquoted = ((flags & (KEY|VALQUO)) + || (scalar_style_json_choose(sc.scalar) & SCALAR_DQUO)); // choose the style + if(dquoted) + _write_scalar_json_dquo(sc.scalar); + else + this->Writer::_do_write(sc.scalar); + } + else + { + if(sc.scalar.str || (flags & (KEY|VALQUO|KEYTAG|VALTAG))) + this->Writer::_do_write("\"\""); + else + this->Writer::_do_write("null"); + } +} + +template +size_t Emitter::_write_escaped_newlines(csubstr s, size_t i) +{ + RYML_ASSERT(s.len > i); + RYML_ASSERT(s.str[i] == '\n'); + //_c4dbgpf("nl@i={} rem=[{}]~~~{}~~~", i, s.sub(i).len, s.sub(i)); + // add an extra newline for each sequence of consecutive + // newline/whitespace + this->Writer::_do_write('\n'); + do + { + this->Writer::_do_write('\n'); // write the newline again + ++i; // increase the outer loop counter! + } while(i < s.len && s.str[i] == '\n'); + _RYML_CB_ASSERT(m_tree->callbacks(), i > 0); + --i; + _RYML_CB_ASSERT(m_tree->callbacks(), s.str[i] == '\n'); + return i; +} + +inline bool _is_indented_block(csubstr s, size_t prev, size_t i) noexcept +{ + if(prev == 0 && s.begins_with_any(" \t")) + return true; + const size_t pos = s.first_not_of('\n', i); + return (pos != npos) && (s.str[pos] == ' ' || s.str[pos] == '\t'); } -#define _rymlindent_nextline() for(size_t lv = 0; lv < ilevel+1; ++lv) { this->Writer::_do_write(' '); this->Writer::_do_write(' '); } +template +size_t Emitter::_write_indented_block(csubstr s, size_t i, id_type ilevel) +{ + //_c4dbgpf("indblock@i={} rem=[{}]~~~\n{}~~~", i, s.sub(i).len, s.sub(i)); + _RYML_CB_ASSERT(m_tree->callbacks(), i > 0); + _RYML_CB_ASSERT(m_tree->callbacks(), s.str[i-1] == '\n'); + _RYML_CB_ASSERT(m_tree->callbacks(), i < s.len); + _RYML_CB_ASSERT(m_tree->callbacks(), s.str[i] == ' ' || s.str[i] == '\t' || s.str[i] == '\n'); +again: + size_t pos = s.find("\n ", i); + if(pos == npos) + pos = s.find("\n\t", i); + if(pos != npos) + { + ++pos; + //_c4dbgpf("indblock line@i={} rem=[{}]~~~\n{}~~~", i, s.range(i, pos).len, s.range(i, pos)); + _rymlindent_nextline(); + this->Writer::_do_write(s.range(i, pos)); + i = pos; + goto again; + } + // consume the newlines after the indented block + // to prevent them from being escaped + pos = s.find('\n', i); + if(pos != npos) + { + const size_t pos2 = s.first_not_of('\n', pos); + pos = (pos2 != npos) ? pos2 : pos; + //_c4dbgpf("indblock line@i={} rem=[{}]~~~\n{}~~~", i, s.range(i, pos).len, s.range(i, pos)); + _rymlindent_nextline(); + this->Writer::_do_write(s.range(i, pos)); + i = pos; + } + return i; +} template -void Emitter::_write_scalar_literal(csubstr s, size_t ilevel, bool explicit_key, bool explicit_indentation) +void Emitter::_write_scalar_literal(csubstr s, id_type ilevel, bool explicit_key) { + _RYML_CB_ASSERT(m_tree->callbacks(), s.find("\r") == csubstr::npos); if(explicit_key) this->Writer::_do_write("? "); - csubstr trimmed = s.trimr("\n\r"); - size_t numnewlines_at_end = s.len - trimmed.len - s.sub(trimmed.len).count('\r'); + csubstr trimmed = s.trimr('\n'); + const size_t numnewlines_at_end = s.len - trimmed.len; + const bool is_newline_only = (trimmed.len == 0 && (s.len > 0)); + const bool explicit_indentation = s.triml("\n\r").begins_with_any(" \t"); // - if(!explicit_indentation) - this->Writer::_do_write('|'); - else - this->Writer::_do_write("|2"); + this->Writer::_do_write('|'); + if(explicit_indentation) + this->Writer::_do_write('2'); // - if(numnewlines_at_end > 1 || (trimmed.len == 0 && s.len > 0)/*only newlines*/) - this->Writer::_do_write("+\n"); - else if(numnewlines_at_end == 1) - this->Writer::_do_write('\n'); - else - this->Writer::_do_write("-\n"); + if(numnewlines_at_end > 1 || is_newline_only) + this->Writer::_do_write('+'); + else if(numnewlines_at_end == 0) + this->Writer::_do_write('-'); // if(trimmed.len) { + this->Writer::_do_write('\n'); size_t pos = 0; // tracks the last character that was already written for(size_t i = 0; i < trimmed.len; ++i) { @@ -23161,81 +26559,93 @@ void Emitter::_write_scalar_literal(csubstr s, size_t ilevel, bool expli _rymlindent_nextline() this->Writer::_do_write(trimmed.sub(pos)); } - if(numnewlines_at_end) - { - this->Writer::_do_write('\n'); - --numnewlines_at_end; - } } - for(size_t i = 0; i < numnewlines_at_end; ++i) - { - _rymlindent_nextline() - if(i+1 < numnewlines_at_end || explicit_key) - this->Writer::_do_write('\n'); - } - if(explicit_key && !numnewlines_at_end) + for(size_t i = !is_newline_only; i < numnewlines_at_end; ++i) + this->Writer::_do_write('\n'); + if(explicit_key) this->Writer::_do_write('\n'); } template -void Emitter::_write_scalar_folded(csubstr s, size_t ilevel, bool explicit_key) +void Emitter::_write_scalar_folded(csubstr s, id_type ilevel, bool explicit_key) { if(explicit_key) - { this->Writer::_do_write("? "); - } - RYML_ASSERT(s.find("\r") == csubstr::npos); + _RYML_CB_ASSERT(m_tree->callbacks(), s.find("\r") == csubstr::npos); csubstr trimmed = s.trimr('\n'); - size_t numnewlines_at_end = s.len - trimmed.len; + const size_t numnewlines_at_end = s.len - trimmed.len; + const bool is_newline_only = (trimmed.len == 0 && (s.len > 0)); + const bool explicit_indentation = s.triml("\n\r").begins_with_any(" \t"); + // + this->Writer::_do_write('>'); + if(explicit_indentation) + this->Writer::_do_write('2'); + // if(numnewlines_at_end == 0) - { - this->Writer::_do_write(">-\n"); - } - else if(numnewlines_at_end == 1) - { - this->Writer::_do_write(">\n"); - } - else if(numnewlines_at_end > 1) - { - this->Writer::_do_write(">+\n"); - } + this->Writer::_do_write('-'); + else if(numnewlines_at_end > 1 || is_newline_only) + this->Writer::_do_write('+'); + // if(trimmed.len) { + this->Writer::_do_write('\n'); size_t pos = 0; // tracks the last character that was already written for(size_t i = 0; i < trimmed.len; ++i) { if(trimmed[i] != '\n') continue; - // write everything up to this point - csubstr since_pos = trimmed.range(pos, i+1); // include the newline - pos = i+1; // because of the newline - _rymlindent_nextline() - this->Writer::_do_write(since_pos); - this->Writer::_do_write('\n'); // write the newline twice + // escape newline sequences + if( ! _is_indented_block(s, pos, i)) + { + if(pos < i) + { + _rymlindent_nextline() + this->Writer::_do_write(s.range(pos, i)); + i = _write_escaped_newlines(s, i); + pos = i+1; + } + else + { + if(i+1 < s.len) + { + if(s.str[i+1] == '\n') + { + ++i; + i = _write_escaped_newlines(s, i); + pos = i+1; + } + else + { + this->Writer::_do_write('\n'); + pos = i+1; + } + } + } + } + else // do not escape newlines in indented blocks + { + ++i; + _rymlindent_nextline() + this->Writer::_do_write(s.range(pos, i)); + if(pos > 0 || !s.begins_with_any(" \t")) + i = _write_indented_block(s, i, ilevel); + pos = i; + } } if(pos < trimmed.len) { _rymlindent_nextline() this->Writer::_do_write(trimmed.sub(pos)); } - if(numnewlines_at_end) - { - this->Writer::_do_write('\n'); - --numnewlines_at_end; - } - } - for(size_t i = 0; i < numnewlines_at_end; ++i) - { - _rymlindent_nextline() - if(i+1 < numnewlines_at_end || explicit_key) - this->Writer::_do_write('\n'); } - if(explicit_key && !numnewlines_at_end) + for(size_t i = !is_newline_only; i < numnewlines_at_end; ++i) + this->Writer::_do_write('\n'); + if(explicit_key) this->Writer::_do_write('\n'); } template -void Emitter::_write_scalar_squo(csubstr s, size_t ilevel) +void Emitter::_write_scalar_squo(csubstr s, id_type ilevel) { size_t pos = 0; // tracks the last character that was already written this->Writer::_do_write('\''); @@ -23243,18 +26653,20 @@ void Emitter::_write_scalar_squo(csubstr s, size_t ilevel) { if(s[i] == '\n') { - csubstr sub = s.range(pos, i+1); - this->Writer::_do_write(sub); // write everything up to (including) this char - this->Writer::_do_write('\n'); // write the character again - if(i + 1 < s.len) - _rymlindent_nextline() // indent the next line + this->Writer::_do_write(s.range(pos, i)); // write everything up to (excluding) this char + //_c4dbgpf("newline at {}. writing ~~~{}~~~", i, s.range(pos, i)); + i = _write_escaped_newlines(s, i); + //_c4dbgpf("newline --> {}", i); + if(i < s.len) + _rymlindent_nextline() pos = i+1; } else if(s[i] == '\'') { csubstr sub = s.range(pos, i+1); - this->Writer::_do_write(sub); // write everything up to (including) this char - this->Writer::_do_write('\''); // write the character again + //_c4dbgpf("squote at {}. writing ~~~{}~~~", i, sub); + this->Writer::_do_write(sub); // write everything up to (including) this squote + this->Writer::_do_write('\''); // write the squote again pos = i+1; } } @@ -23265,233 +26677,184 @@ void Emitter::_write_scalar_squo(csubstr s, size_t ilevel) } template -void Emitter::_write_scalar_dquo(csubstr s, size_t ilevel) +void Emitter::_write_scalar_dquo(csubstr s, id_type ilevel) { size_t pos = 0; // tracks the last character that was already written this->Writer::_do_write('"'); for(size_t i = 0; i < s.len; ++i) { const char curr = s.str[i]; - if(curr == '"' || curr == '\\') + switch(curr) + { + case '"': + case '\\': { csubstr sub = s.range(pos, i); this->Writer::_do_write(sub); // write everything up to (excluding) this char this->Writer::_do_write('\\'); // write the escape this->Writer::_do_write(curr); // write the char pos = i+1; + break; } - else if(s[i] == '\n') +#ifndef prefer_writing_newlines_as_double_newlines + case '\n': { - csubstr sub = s.range(pos, i+1); - this->Writer::_do_write(sub); // write everything up to (including) this newline - this->Writer::_do_write('\n'); // write the newline again - if(i + 1 < s.len) - _rymlindent_nextline() // indent the next line + csubstr sub = s.range(pos, i); + this->Writer::_do_write(sub); // write everything up to (excluding) this char + this->Writer::_do_write("\\n"); // write the escape pos = i+1; - if(i+1 < s.len) // escape leading whitespace after the newline - { - const char next = s.str[i+1]; - if(next == ' ' || next == '\t') - this->Writer::_do_write('\\'); - } + (void)ilevel; + break; } - else if(curr == ' ' || curr == '\t') - { - // escape trailing whitespace before a newline - size_t next = s.first_not_of(" \t\r", i); - if(next != npos && s[next] == '\n') +#else + case '\n': + { + // write everything up to (excluding) this newline + //_c4dbgpf("nl@i={} rem=[{}]~~~{}~~~", i, s.sub(i).len, s.sub(i)); + this->Writer::_do_write(s.range(pos, i)); + i = _write_escaped_newlines(s, i); + ++i; + pos = i; + // as for the next line... + if(i < s.len) + { + _rymlindent_nextline() // indent the next line + // escape leading whitespace, and flush it + size_t first = s.first_not_of(" \t", i); + _c4dbgpf("@i={} first={} rem=[{}]~~~{}~~~", i, first, s.sub(i).len, s.sub(i)); + if(first > i) + { + if(first == npos) + first = s.len; + this->Writer::_do_write('\\'); + this->Writer::_do_write(s.range(i, first)); + this->Writer::_do_write('\\'); + i = first-1; + pos = first; + } + } + break; + } + // escape trailing whitespace before a newline + case ' ': + case '\t': + { + const size_t next = s.first_not_of(" \t\r", i); + if(next != npos && s.str[next] == '\n') { csubstr sub = s.range(pos, i); this->Writer::_do_write(sub); // write everything up to (excluding) this char this->Writer::_do_write('\\'); // escape the whitespace pos = i; } + break; } - else if(C4_UNLIKELY(curr == '\r')) +#endif + case '\r': { csubstr sub = s.range(pos, i); this->Writer::_do_write(sub); // write everything up to (excluding) this char this->Writer::_do_write("\\r"); // write the escaped char pos = i+1; + break; + } + case '\b': + { + csubstr sub = s.range(pos, i); + this->Writer::_do_write(sub); // write everything up to (excluding) this char + this->Writer::_do_write("\\b"); // write the escaped char + pos = i+1; + break; + } } } // write missing characters at the end of the string if(pos < s.len) - { - csubstr sub = s.sub(pos); - this->Writer::_do_write(sub); - } + this->Writer::_do_write(s.sub(pos)); this->Writer::_do_write('"'); } template -void Emitter::_write_scalar_plain(csubstr s, size_t ilevel) +void Emitter::_write_scalar_plain(csubstr s, id_type ilevel) { + if(C4_UNLIKELY(ilevel == 0 && (s.begins_with("...") || s.begins_with("---")))) + { + _rymlindent_nextline() // indent the next line + ++ilevel; + } size_t pos = 0; // tracks the last character that was already written for(size_t i = 0; i < s.len; ++i) { const char curr = s.str[i]; if(curr == '\n') { - csubstr sub = s.range(pos, i+1); + csubstr sub = s.range(pos, i); this->Writer::_do_write(sub); // write everything up to (including) this newline - this->Writer::_do_write('\n'); // write the newline again - if(i + 1 < s.len) - _rymlindent_nextline() // indent the next line + i = _write_escaped_newlines(s, i); pos = i+1; + if(pos < s.len) + _rymlindent_nextline() // indent the next line } } // write missing characters at the end of the string if(pos < s.len) - { - csubstr sub = s.sub(pos); - this->Writer::_do_write(sub); - } + this->Writer::_do_write(s.sub(pos)); } #undef _rymlindent_nextline template -void Emitter::_write_scalar(csubstr s, bool was_quoted) +void Emitter::_write_scalar_json_dquo(csubstr s) { - // this block of code needed to be moved to before the needs_quotes - // assignment to work around a g++ optimizer bug where (s.str != nullptr) - // was evaluated as true even if s.str was actually a nullptr (!!!) - if(s.len == size_t(0)) - { - if(was_quoted || s.str != nullptr) - this->Writer::_do_write("''"); - return; - } - - const bool needs_quotes = ( - was_quoted - || - ( - ( ! s.is_number()) - && - ( - // has leading whitespace - // looks like reference or anchor - // would be treated as a directive - // see https://www.yaml.info/learn/quote.html#noplain - s.begins_with_any(" \n\t\r*&%@`") - || - s.begins_with("<<") - || - // has trailing whitespace - s.ends_with_any(" \n\t\r") - || - // has special chars - (s.first_of("#:-?,\n{}[]'\"") != npos) - ) - ) - ); - - if( ! needs_quotes) - { - this->Writer::_do_write(s); - } - else + size_t pos = 0; + this->Writer::_do_write('"'); + for(size_t i = 0; i < s.len; ++i) { - const bool has_dquotes = s.first_of( '"') != npos; - const bool has_squotes = s.first_of('\'') != npos; - if(!has_squotes && has_dquotes) + switch(s.str[i]) { - this->Writer::_do_write('\''); - this->Writer::_do_write(s); - this->Writer::_do_write('\''); - } - else if(has_squotes && !has_dquotes) - { - RYML_ASSERT(s.count('\n') == 0); - this->Writer::_do_write('"'); - this->Writer::_do_write(s); - this->Writer::_do_write('"'); - } - else - { - _write_scalar_squo(s, /*FIXME FIXME FIXME*/0); + case '"': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\\""); + pos = i + 1; + break; + case '\n': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\n"); + pos = i + 1; + break; + case '\t': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\t"); + pos = i + 1; + break; + case '\\': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\\\"); + pos = i + 1; + break; + case '\r': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\r"); + pos = i + 1; + break; + case '\b': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\b"); + pos = i + 1; + break; + case '\f': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\f"); + pos = i + 1; + break; } } -} -template -void Emitter::_write_scalar_json(csubstr s, bool as_key, bool use_quotes) -{ - if((!use_quotes) - // json keys require quotes - && (!as_key) - && ( - // do not quote special cases - (s == "true" || s == "false" || s == "null") - || ( - // do not quote numbers - (s.is_number() - && ( - // quote integral numbers if they have a leading 0 - // https://github.com/biojppm/rapidyaml/issues/291 - (!(s.len > 1 && s.begins_with('0'))) - // do not quote reals with leading 0 - // https://github.com/biojppm/rapidyaml/issues/313 - || (s.find('.') != csubstr::npos) )) - ) - ) - ) - { - this->Writer::_do_write(s); - } - else + if(pos < s.len) { - size_t pos = 0; - this->Writer::_do_write('"'); - for(size_t i = 0; i < s.len; ++i) - { - switch(s.str[i]) - { - case '"': - this->Writer ::_do_write(s.range(pos, i)); - this->Writer ::_do_write("\\\""); - pos = i + 1; - break; - case '\n': - this->Writer ::_do_write(s.range(pos, i)); - this->Writer ::_do_write("\\n"); - pos = i + 1; - break; - case '\t': - this->Writer ::_do_write(s.range(pos, i)); - this->Writer ::_do_write("\\t"); - pos = i + 1; - break; - case '\\': - this->Writer ::_do_write(s.range(pos, i)); - this->Writer ::_do_write("\\\\"); - pos = i + 1; - break; - case '\r': - this->Writer ::_do_write(s.range(pos, i)); - this->Writer ::_do_write("\\r"); - pos = i + 1; - break; - case '\b': - this->Writer ::_do_write(s.range(pos, i)); - this->Writer ::_do_write("\\b"); - pos = i + 1; - break; - case '\f': - this->Writer ::_do_write(s.range(pos, i)); - this->Writer ::_do_write("\\f"); - pos = i + 1; - break; - } - } - if(pos < s.len) - { - csubstr sub = s.sub(pos); - this->Writer::_do_write(sub); - } - this->Writer::_do_write('"'); + csubstr sub = s.sub(pos); + this->Writer::_do_write(sub); } + this->Writer::_do_write('"'); } } // namespace yml @@ -23529,24 +26892,32 @@ void Emitter::_write_scalar_json(csubstr s, bool as_key, bool use_quotes namespace c4 { namespace yml { + +C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast") + namespace detail { -/** A lightweight contiguous stack with SSO. This avoids a dependency on std. */ -template +/** A lightweight contiguous stack with Small Storage + * Optimization. This is required because std::vector can throw + * exceptions, and we don't want to enforce any particular error + * mechanism. */ +template class stack { static_assert(std::is_trivially_copyable::value, "T must be trivially copyable"); static_assert(std::is_trivially_destructible::value, "T must be trivially destructible"); - enum : size_t { sso_size = N }; +public: + + enum : id_type { sso_size = N }; public: - T m_buf[N]; - T * m_stack; - size_t m_size; - size_t m_capacity; - Callbacks m_callbacks; + T m_buf[size_t(N)]; + T *C4_RESTRICT m_stack; + id_type m_size; + id_type m_capacity; + Callbacks m_callbacks; public: @@ -23564,7 +26935,7 @@ class stack _free(); } - stack(stack const& that) noexcept : stack(that.m_callbacks) + stack(stack const& that) RYML_NOEXCEPT : stack(that.m_callbacks) { resize(that.m_size); _cp(&that); @@ -23575,7 +26946,7 @@ class stack _mv(&that); } - stack& operator= (stack const& that) noexcept + stack& operator= (stack const& that) RYML_NOEXCEPT { _cb(that.m_callbacks); resize(that.m_size); @@ -23592,29 +26963,29 @@ class stack public: - size_t size() const { return m_size; } - size_t empty() const { return m_size == 0; } - size_t capacity() const { return m_capacity; } + id_type size() const { return m_size; } + id_type empty() const { return m_size == 0; } + id_type capacity() const { return m_capacity; } void clear() { m_size = 0; } - void resize(size_t sz) + void resize(id_type sz) { reserve(sz); m_size = sz; } - void reserve(size_t sz); + void reserve(id_type sz); void push(T const& C4_RESTRICT n) { - RYML_ASSERT((const char*)&n + sizeof(T) < (const char*)m_stack || &n > m_stack + m_capacity); + _RYML_CB_ASSERT(m_callbacks, (const char*)&n + sizeof(T) < (const char*)m_stack || &n > m_stack + m_capacity); if(m_size == m_capacity) { - size_t cap = m_capacity == 0 ? N : 2 * m_capacity; + id_type cap = m_capacity == 0 ? N : 2 * m_capacity; reserve(cap); } m_stack[m_size] = n; @@ -23623,10 +26994,10 @@ class stack void push_top() { - RYML_ASSERT(m_size > 0); + _RYML_CB_ASSERT(m_callbacks, m_size > 0); if(m_size == m_capacity) { - size_t cap = m_capacity == 0 ? N : 2 * m_capacity; + id_type cap = m_capacity == 0 ? N : 2 * m_capacity; reserve(cap); } m_stack[m_size] = m_stack[m_size - 1]; @@ -23635,25 +27006,25 @@ class stack T const& C4_RESTRICT pop() { - RYML_ASSERT(m_size > 0); + _RYML_CB_ASSERT(m_callbacks, m_size > 0); --m_size; return m_stack[m_size]; } - C4_ALWAYS_INLINE T const& C4_RESTRICT top() const { RYML_ASSERT(m_size > 0); return m_stack[m_size - 1]; } - C4_ALWAYS_INLINE T & C4_RESTRICT top() { RYML_ASSERT(m_size > 0); return m_stack[m_size - 1]; } + C4_ALWAYS_INLINE T const& C4_RESTRICT top() const { _RYML_CB_ASSERT(m_callbacks, m_size > 0); return m_stack[m_size - 1]; } + C4_ALWAYS_INLINE T & C4_RESTRICT top() { _RYML_CB_ASSERT(m_callbacks, m_size > 0); return m_stack[m_size - 1]; } - C4_ALWAYS_INLINE T const& C4_RESTRICT bottom() const { RYML_ASSERT(m_size > 0); return m_stack[0]; } - C4_ALWAYS_INLINE T & C4_RESTRICT bottom() { RYML_ASSERT(m_size > 0); return m_stack[0]; } + C4_ALWAYS_INLINE T const& C4_RESTRICT bottom() const { _RYML_CB_ASSERT(m_callbacks, m_size > 0); return m_stack[0]; } + C4_ALWAYS_INLINE T & C4_RESTRICT bottom() { _RYML_CB_ASSERT(m_callbacks, m_size > 0); return m_stack[0]; } - C4_ALWAYS_INLINE T const& C4_RESTRICT top(size_t i) const { RYML_ASSERT(i < m_size); return m_stack[m_size - 1 - i]; } - C4_ALWAYS_INLINE T & C4_RESTRICT top(size_t i) { RYML_ASSERT(i < m_size); return m_stack[m_size - 1 - i]; } + C4_ALWAYS_INLINE T const& C4_RESTRICT top(id_type i) const { _RYML_CB_ASSERT(m_callbacks, i < m_size); return m_stack[m_size - 1 - i]; } + C4_ALWAYS_INLINE T & C4_RESTRICT top(id_type i) { _RYML_CB_ASSERT(m_callbacks, i < m_size); return m_stack[m_size - 1 - i]; } - C4_ALWAYS_INLINE T const& C4_RESTRICT bottom(size_t i) const { RYML_ASSERT(i < m_size); return m_stack[i]; } - C4_ALWAYS_INLINE T & C4_RESTRICT bottom(size_t i) { RYML_ASSERT(i < m_size); return m_stack[i]; } + C4_ALWAYS_INLINE T const& C4_RESTRICT bottom(id_type i) const { _RYML_CB_ASSERT(m_callbacks, i < m_size); return m_stack[i]; } + C4_ALWAYS_INLINE T & C4_RESTRICT bottom(id_type i) { _RYML_CB_ASSERT(m_callbacks, i < m_size); return m_stack[i]; } - C4_ALWAYS_INLINE T const& C4_RESTRICT operator[](size_t i) const { RYML_ASSERT(i < m_size); return m_stack[i]; } - C4_ALWAYS_INLINE T & C4_RESTRICT operator[](size_t i) { RYML_ASSERT(i < m_size); return m_stack[i]; } + C4_ALWAYS_INLINE T const& C4_RESTRICT operator[](id_type i) const { _RYML_CB_ASSERT(m_callbacks, i < m_size); return m_stack[i]; } + C4_ALWAYS_INLINE T & C4_RESTRICT operator[](id_type i) { _RYML_CB_ASSERT(m_callbacks, i < m_size); return m_stack[i]; } public: @@ -23667,10 +27038,12 @@ class stack const_iterator end () const { return (const_iterator)m_stack + m_size; } public: + void _free(); void _cp(stack const* C4_RESTRICT that); void _mv(stack * that); void _cb(Callbacks const& cb); + }; @@ -23678,8 +27051,8 @@ class stack //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- -template -void stack::reserve(size_t sz) +template +void stack::reserve(id_type sz) { if(sz <= m_size) return; @@ -23689,11 +27062,12 @@ void stack::reserve(size_t sz) m_capacity = N; return; } - T *buf = (T*) m_callbacks.m_allocate(sz * sizeof(T), m_stack, m_callbacks.m_user_data); - memcpy(buf, m_stack, m_size * sizeof(T)); + T *buf = (T*) m_callbacks.m_allocate((size_t)sz * sizeof(T), m_stack, m_callbacks.m_user_data); + _RYML_CB_ASSERT(m_callbacks, ((uintptr_t)buf % alignof(T)) == 0u); + memcpy(buf, m_stack, (size_t)m_size * sizeof(T)); if(m_stack != m_buf) { - m_callbacks.m_free(m_stack, m_capacity * sizeof(T), m_callbacks.m_user_data); + m_callbacks.m_free(m_stack, (size_t)m_capacity * sizeof(T), m_callbacks.m_user_data); } m_stack = buf; m_capacity = sz; @@ -23702,38 +27076,38 @@ void stack::reserve(size_t sz) //----------------------------------------------------------------------------- -template +template void stack::_free() { - RYML_ASSERT(m_stack != nullptr); // this structure cannot be memset() to zero + _RYML_CB_ASSERT(m_callbacks, m_stack != nullptr); // this structure cannot be memset() to zero if(m_stack != m_buf) { - m_callbacks.m_free(m_stack, m_capacity * sizeof(T), m_callbacks.m_user_data); + m_callbacks.m_free(m_stack, (size_t)m_capacity * sizeof(T), m_callbacks.m_user_data); m_stack = m_buf; m_size = N; m_capacity = N; } else { - RYML_ASSERT(m_capacity == N); + _RYML_CB_ASSERT(m_callbacks, m_capacity == N); } } //----------------------------------------------------------------------------- -template +template void stack::_cp(stack const* C4_RESTRICT that) { if(that->m_stack != that->m_buf) { - RYML_ASSERT(that->m_capacity > N); - RYML_ASSERT(that->m_size <= that->m_capacity); + _RYML_CB_ASSERT(m_callbacks, that->m_capacity > N); + _RYML_CB_ASSERT(m_callbacks, that->m_size <= that->m_capacity); } else { - RYML_ASSERT(that->m_capacity <= N); - RYML_ASSERT(that->m_size <= that->m_capacity); + _RYML_CB_ASSERT(m_callbacks, that->m_capacity <= N); + _RYML_CB_ASSERT(m_callbacks, that->m_size <= that->m_capacity); } memcpy(m_stack, that->m_stack, that->m_size * sizeof(T)); m_size = that->m_size; @@ -23744,19 +27118,19 @@ void stack::_cp(stack const* C4_RESTRICT that) //----------------------------------------------------------------------------- -template +template void stack::_mv(stack * that) { if(that->m_stack != that->m_buf) { - RYML_ASSERT(that->m_capacity > N); - RYML_ASSERT(that->m_size <= that->m_capacity); + _RYML_CB_ASSERT(m_callbacks, that->m_capacity > N); + _RYML_CB_ASSERT(m_callbacks, that->m_size <= that->m_capacity); m_stack = that->m_stack; } else { - RYML_ASSERT(that->m_capacity <= N); - RYML_ASSERT(that->m_size <= that->m_capacity); + _RYML_CB_ASSERT(m_callbacks, that->m_capacity <= N); + _RYML_CB_ASSERT(m_callbacks, that->m_size <= that->m_capacity); memcpy(m_buf, that->m_buf, that->m_size * sizeof(T)); m_stack = m_buf; } @@ -23764,7 +27138,7 @@ void stack::_mv(stack * that) m_capacity = that->m_capacity; m_callbacks = that->m_callbacks; // make sure no deallocation happens on destruction - RYML_ASSERT(that->m_stack != m_buf); + _RYML_CB_ASSERT(m_callbacks, that->m_stack != m_buf); that->m_stack = that->m_buf; that->m_capacity = N; that->m_size = 0; @@ -23773,7 +27147,7 @@ void stack::_mv(stack * that) //----------------------------------------------------------------------------- -template +template void stack::_cb(Callbacks const& cb) { if(cb != m_callbacks) @@ -23784,6 +27158,9 @@ void stack::_cb(Callbacks const& cb) } } // namespace detail + +C4_SUPPRESS_WARNING_GCC_CLANG_POP + } // namespace yml } // namespace c4 @@ -23796,9135 +27173,14708 @@ void stack::_cb(Callbacks const& cb) //******************************************************************************** //-------------------------------------------------------------------------------- -// src/c4/yml/parse.hpp -// https://github.com/biojppm/rapidyaml/src/c4/yml/parse.hpp +// src/c4/yml/filter_processor.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/filter_processor.hpp //-------------------------------------------------------------------------------- //******************************************************************************** -#ifndef _C4_YML_PARSE_HPP_ -#define _C4_YML_PARSE_HPP_ +#ifndef _C4_YML_FILTER_PROCESSOR_HPP_ +#define _C4_YML_FILTER_PROCESSOR_HPP_ -#ifndef _C4_YML_TREE_HPP_ // amalgamate: removed include of -// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp -//#include "c4/yml/tree.hpp" -#if !defined(C4_YML_TREE_HPP_) && !defined(_C4_YML_TREE_HPP_) -#error "amalgamate: file c4/yml/tree.hpp must have been included at this point" -#endif /* C4_YML_TREE_HPP_ */ +// https://github.com/biojppm/rapidyaml/src/c4/yml/common.hpp +//#include "c4/yml/common.hpp" +#if !defined(C4_YML_COMMON_HPP_) && !defined(_C4_YML_COMMON_HPP_) +#error "amalgamate: file c4/yml/common.hpp must have been included at this point" +#endif /* C4_YML_COMMON_HPP_ */ -#endif -#ifndef _C4_YML_NODE_HPP_ +#ifdef RYML_DBG // amalgamate: removed include of -// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp -//#include "c4/yml/node.hpp" -#if !defined(C4_YML_NODE_HPP_) && !defined(_C4_YML_NODE_HPP_) -#error "amalgamate: file c4/yml/node.hpp must have been included at this point" -#endif /* C4_YML_NODE_HPP_ */ - -#endif +// https://github.com/biojppm/rapidyaml/src/c4/charconv.hpp +//#include "c4/charconv.hpp" +#if !defined(C4_CHARCONV_HPP_) && !defined(_C4_CHARCONV_HPP_) +#error "amalgamate: file c4/charconv.hpp must have been included at this point" +#endif /* C4_CHARCONV_HPP_ */ -#ifndef _C4_YML_DETAIL_STACK_HPP_ // amalgamate: removed include of -// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/stack.hpp -//#include "c4/yml/detail/stack.hpp" -#if !defined(C4_YML_DETAIL_STACK_HPP_) && !defined(_C4_YML_DETAIL_STACK_HPP_) -#error "amalgamate: file c4/yml/detail/stack.hpp must have been included at this point" -#endif /* C4_YML_DETAIL_STACK_HPP_ */ - -#endif - -//included above: -//#include +// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp +//#include "c4/yml/detail/parser_dbg.hpp" +#if !defined(C4_YML_DETAIL_PARSER_DBG_HPP_) && !defined(_C4_YML_DETAIL_PARSER_DBG_HPP_) +#error "amalgamate: file c4/yml/detail/parser_dbg.hpp must have been included at this point" +#endif /* C4_YML_DETAIL_PARSER_DBG_HPP_ */ -#if defined(_MSC_VER) -# pragma warning(push) -# pragma warning(disable: 4251/*needs to have dll-interface to be used by clients of struct*/) #endif namespace c4 { namespace yml { -struct RYML_EXPORT ParserOptions -{ -private: - - typedef enum : uint32_t { - LOCATIONS = (1 << 0), - DEFAULTS = 0, - } Flags_e; +/** @defgroup doc_filter_processors Scalar filter processors + * + * These are internal classes used by @ref ParseEngine to parse the + * scalars; normally there is no reason for a user to be manually + * using these classes. + * + * @ingroup doc_parse */ +/** @{ */ - uint32_t flags = DEFAULTS; -public: - ParserOptions() = default; +//----------------------------------------------------------------------------- - /** @name source location tracking */ - /** @{ */ +/** Filters an input string into a different output string */ +struct FilterProcessorSrcDst +{ + csubstr src; + substr dst; + size_t rpos; ///< read position + size_t wpos; ///< write position - /** enable/disable source location tracking */ - ParserOptions& locations(bool enabled) + C4_ALWAYS_INLINE FilterProcessorSrcDst(csubstr src_, substr dst_) noexcept + : src(src_) + , dst(dst_) + , rpos(0) + , wpos(0) { - if(enabled) - flags |= LOCATIONS; - else - flags &= ~LOCATIONS; - return *this; + RYML_ASSERT(!dst.overlaps(src)); } - bool locations() const { return (flags & LOCATIONS) != 0u; } - - /** @} */ -}; - - -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -class RYML_EXPORT Parser -{ -public: - - /** @name construction and assignment */ - /** @{ */ - Parser(Callbacks const& cb, ParserOptions opts={}); - Parser(ParserOptions opts={}) : Parser(get_callbacks(), opts) {} - ~Parser(); + C4_ALWAYS_INLINE void setwpos(size_t wpos_) noexcept { wpos = wpos_; } + C4_ALWAYS_INLINE void setpos(size_t rpos_, size_t wpos_) noexcept { rpos = rpos_; wpos = wpos_; } + C4_ALWAYS_INLINE void set_at_end() noexcept { skip(src.len - rpos); } - Parser(Parser &&); - Parser(Parser const&); - Parser& operator=(Parser &&); - Parser& operator=(Parser const&); + C4_ALWAYS_INLINE bool has_more_chars() const noexcept { return rpos < src.len; } + C4_ALWAYS_INLINE bool has_more_chars(size_t maxpos) const noexcept { RYML_ASSERT(maxpos <= src.len); return rpos < maxpos; } - /** @} */ + C4_ALWAYS_INLINE csubstr rem() const noexcept { return src.sub(rpos); } + C4_ALWAYS_INLINE csubstr sofar() const noexcept { return csubstr(dst.str, wpos <= dst.len ? wpos : dst.len); } + C4_ALWAYS_INLINE FilterResult result() const noexcept + { + FilterResult ret; + ret.str.str = wpos <= dst.len ? dst.str : nullptr; + ret.str.len = wpos; + return ret; + } -public: + C4_ALWAYS_INLINE char curr() const noexcept { RYML_ASSERT(rpos < src.len); return src[rpos]; } + C4_ALWAYS_INLINE char next() const noexcept { return rpos+1 < src.len ? src[rpos+1] : '\0'; } + C4_ALWAYS_INLINE bool skipped_chars() const noexcept { return wpos != rpos; } - /** @name modifiers */ - /** @{ */ + C4_ALWAYS_INLINE void skip() noexcept { ++rpos; } + C4_ALWAYS_INLINE void skip(size_t num) noexcept { rpos += num; } - /** Reserve a certain capacity for the parsing stack. - * This should be larger than the expected depth of the parsed - * YAML tree. - * - * The parsing stack is the only (potential) heap memory used by - * the parser. - * - * If the requested capacity is below the default - * stack size of 16, the memory is used directly in the parser - * object; otherwise it will be allocated from the heap. - * - * @note this reserves memory only for the parser itself; all the - * allocations for the parsed tree will go through the tree's - * allocator. - * - * @note the tree and the arena can (and should) also be reserved. */ - void reserve_stack(size_t capacity) + C4_ALWAYS_INLINE void set_at(size_t pos, char c) noexcept { - m_stack.reserve(capacity); + RYML_ASSERT(pos < wpos); + dst.str[pos] = c; } - - /** Reserve a certain capacity for the array used to track node - * locations in the source buffer. */ - void reserve_locations(size_t num_source_lines) + C4_ALWAYS_INLINE void set(char c) noexcept { - _resize_locations(num_source_lines); + if(wpos < dst.len) + dst.str[wpos] = c; + ++wpos; + } + C4_ALWAYS_INLINE void set(char c, size_t num) noexcept + { + RYML_ASSERT(num > 0); + if(wpos + num <= dst.len) + memset(dst.str + wpos, c, num); + wpos += num; } - /** Reserve a certain capacity for the character arena used to - * filter scalars. */ - void reserve_filter_arena(size_t num_characters) + C4_ALWAYS_INLINE void copy() noexcept + { + RYML_ASSERT(rpos < src.len); + if(wpos < dst.len) + dst.str[wpos] = src.str[rpos]; + ++wpos; + ++rpos; + } + C4_ALWAYS_INLINE void copy(size_t num) noexcept { - _resize_filter_arena(num_characters); + RYML_ASSERT(num); + RYML_ASSERT(rpos+num <= src.len); + if(wpos + num <= dst.len) + memcpy(dst.str + wpos, src.str + rpos, num); + wpos += num; + rpos += num; } - /** @} */ + C4_ALWAYS_INLINE void translate_esc(char c) noexcept + { + if(wpos < dst.len) + dst.str[wpos] = c; + ++wpos; + rpos += 2; + } + C4_ALWAYS_INLINE void translate_esc_bulk(const char *C4_RESTRICT s, size_t nw, size_t nr) noexcept + { + RYML_ASSERT(nw > 0); + RYML_ASSERT(nr > 0); + RYML_ASSERT(rpos+nr <= src.len); + if(wpos+nw <= dst.len) + memcpy(dst.str + wpos, s, nw); + wpos += nw; + rpos += 1 + nr; + } + C4_ALWAYS_INLINE void translate_esc_extending(const char *C4_RESTRICT s, size_t nw, size_t nr) noexcept + { + translate_esc_bulk(s, nw, nr); + } +}; -public: - /** @name getters and modifiers */ - /** @{ */ +//----------------------------------------------------------------------------- +// filter in place - /** Get the current callbacks in the parser. */ - Callbacks callbacks() const { return m_stack.m_callbacks; } +// debugging scaffold +/** @cond dev */ +#if defined(RYML_DBG) && 0 +#define _c4dbgip(...) _c4dbgpf(__VA_ARGS__) +#else +#define _c4dbgip(...) +#endif +/** @endcond */ - /** Get the name of the latest file parsed by this object. */ - csubstr filename() const { return m_file; } +/** Filters in place. While the result may be larger than the source, + * any extending happens only at the end of the string. Consequently, + * it's impossible for characters to be left unfiltered. + * + * @see FilterProcessorInplaceMidExtending */ +struct FilterProcessorInplaceEndExtending +{ + substr src; ///< the subject string + size_t wcap; ///< write capacity - the capacity of the subject string's buffer + size_t rpos; ///< read position + size_t wpos; ///< write position - /** Get the latest YAML buffer parsed by this object. */ - csubstr source() const { return m_buf; } + C4_ALWAYS_INLINE FilterProcessorInplaceEndExtending(substr src_, size_t wcap_) noexcept + : src(src_) + , wcap(wcap_) + , rpos(0) + , wpos(0) + { + RYML_ASSERT(wcap >= src.len); + } - size_t stack_capacity() const { return m_stack.capacity(); } - size_t locations_capacity() const { return m_newline_offsets_capacity; } - size_t filter_arena_capacity() const { return m_filter_arena.len; } + C4_ALWAYS_INLINE void setwpos(size_t wpos_) noexcept { wpos = wpos_; } + C4_ALWAYS_INLINE void setpos(size_t rpos_, size_t wpos_) noexcept { rpos = rpos_; wpos = wpos_; } + C4_ALWAYS_INLINE void set_at_end() noexcept { skip(src.len - rpos); } - ParserOptions const& options() const { return m_options; } + C4_ALWAYS_INLINE bool has_more_chars() const noexcept { return rpos < src.len; } + C4_ALWAYS_INLINE bool has_more_chars(size_t maxpos) const noexcept { RYML_ASSERT(maxpos <= src.len); return rpos < maxpos; } - /** @} */ + C4_ALWAYS_INLINE FilterResult result() const noexcept + { + _c4dbgip("inplace: wpos={} wcap={} small={}", wpos, wcap, wpos > rpos); + FilterResult ret; + ret.str.str = (wpos <= wcap) ? src.str : nullptr; + ret.str.len = wpos; + return ret; + } + C4_ALWAYS_INLINE csubstr sofar() const noexcept { return csubstr(src.str, wpos <= wcap ? wpos : wcap); } + C4_ALWAYS_INLINE csubstr rem() const noexcept { return src.sub(rpos); } -public: + C4_ALWAYS_INLINE char curr() const noexcept { RYML_ASSERT(rpos < src.len); return src[rpos]; } + C4_ALWAYS_INLINE char next() const noexcept { return rpos+1 < src.len ? src[rpos+1] : '\0'; } - /** @name parse_in_place */ - /** @{ */ + C4_ALWAYS_INLINE void skip() noexcept { ++rpos; } + C4_ALWAYS_INLINE void skip(size_t num) noexcept { rpos += num; } - /** Create a new tree and parse into its root. - * The tree is created with the callbacks currently in the parser. */ - Tree parse_in_place(csubstr filename, substr src) + void set_at(size_t pos, char c) noexcept { - Tree t(callbacks()); - t.reserve(_estimate_capacity(src)); - this->parse_in_place(filename, src, &t, t.root_id()); - return t; + RYML_ASSERT(pos < wpos); + const size_t save = wpos; + wpos = pos; + set(c); + wpos = save; } - - /** Parse into an existing tree, starting at its root node. - * The callbacks in the tree are kept, and used to allocate - * the tree members, if any allocation is required. */ - void parse_in_place(csubstr filename, substr src, Tree *t) + void set(char c) noexcept { - this->parse_in_place(filename, src, t, t->root_id()); + if(wpos < wcap) // respect write-capacity + src.str[wpos] = c; + ++wpos; } - - /** Parse into an existing node. - * The callbacks in the tree are kept, and used to allocate - * the tree members, if any allocation is required. */ - void parse_in_place(csubstr filename, substr src, Tree *t, size_t node_id); - // ^^^^^^^^^^^^^ this is the workhorse overload; everything else is syntactic candy - - /** Parse into an existing node. - * The callbacks in the tree are kept, and used to allocate - * the tree members, if any allocation is required. */ - void parse_in_place(csubstr filename, substr src, NodeRef node) + void set(char c, size_t num) noexcept { - this->parse_in_place(filename, src, node.tree(), node.id()); + RYML_ASSERT(num); + if(wpos + num <= wcap) // respect write-capacity + memset(src.str + wpos, c, num); + wpos += num; } - RYML_DEPRECATED("use parse_in_place() instead") Tree parse(csubstr filename, substr src) { return parse_in_place(filename, src); } - RYML_DEPRECATED("use parse_in_place() instead") void parse(csubstr filename, substr src, Tree *t) { parse_in_place(filename, src, t); } - RYML_DEPRECATED("use parse_in_place() instead") void parse(csubstr filename, substr src, Tree *t, size_t node_id) { parse_in_place(filename, src, t, node_id); } - RYML_DEPRECATED("use parse_in_place() instead") void parse(csubstr filename, substr src, NodeRef node) { parse_in_place(filename, src, node); } - - /** @} */ - -public: - - /** @name parse_in_arena: copy the YAML source buffer to the - * tree's arena, then parse the copy in situ - * - * @note overloads receiving a substr YAML buffer are intentionally - * left undefined, such that calling parse_in_arena() with a substr - * will cause a linker error. This is to prevent an accidental - * copy of the source buffer to the tree's arena, because substr - * is implicitly convertible to csubstr. If you really intend to parse - * a mutable buffer in the tree's arena, convert it first to immutable - * by assigning the substr to a csubstr prior to calling parse_in_arena(). - * This is not needed for parse_in_place() because csubstr is not - * implicitly convertible to substr. */ - /** @{ */ - - // READ THE NOTE ABOVE! - #define RYML_DONT_PARSE_SUBSTR_IN_ARENA "Do not pass a (mutable) substr to parse_in_arena(); if you have a substr, it should be parsed in place. Consider using parse_in_place() instead, or convert the buffer to csubstr prior to calling. This function is deliberately left undefined and will cause a linker error." - RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_in_arena(csubstr filename, substr csrc); - RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr csrc, Tree *t); - RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr csrc, Tree *t, size_t node_id); - RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr csrc, NodeRef node); - - /** Create a new tree and parse into its root. - * The immutable YAML source is first copied to the tree's arena, - * and parsed from there. - * The callbacks in the tree are kept, and used to allocate - * the tree members, if any allocation is required. */ - Tree parse_in_arena(csubstr filename, csubstr csrc) + void copy() noexcept { - Tree t(callbacks()); - substr src = t.copy_to_arena(csrc); - t.reserve(_estimate_capacity(csrc)); - this->parse_in_place(filename, src, &t, t.root_id()); - return t; + RYML_ASSERT(wpos <= rpos); + RYML_ASSERT(rpos < src.len); + if(wpos < wcap) // respect write-capacity + src.str[wpos] = src.str[rpos]; + ++rpos; + ++wpos; } - - /** Parse into an existing tree, starting at its root node. - * The immutable YAML source is first copied to the tree's arena, - * and parsed from there. - * The callbacks in the tree are kept, and used to allocate - * the tree members, if any allocation is required. */ - void parse_in_arena(csubstr filename, csubstr csrc, Tree *t) + void copy(size_t num) noexcept { - substr src = t->copy_to_arena(csrc); - this->parse_in_place(filename, src, t, t->root_id()); + RYML_ASSERT(num); + RYML_ASSERT(rpos+num <= src.len); + RYML_ASSERT(wpos <= rpos); + if(wpos + num <= wcap) // respect write-capacity + { + if(wpos + num <= rpos) // there is no overlap + memcpy(src.str + wpos, src.str + rpos, num); + else // there is overlap + memmove(src.str + wpos, src.str + rpos, num); + } + rpos += num; + wpos += num; } - /** Parse into a specific node in an existing tree. - * The immutable YAML source is first copied to the tree's arena, - * and parsed from there. - * The callbacks in the tree are kept, and used to allocate - * the tree members, if any allocation is required. */ - void parse_in_arena(csubstr filename, csubstr csrc, Tree *t, size_t node_id) + void translate_esc(char c) noexcept { - substr src = t->copy_to_arena(csrc); - this->parse_in_place(filename, src, t, node_id); + RYML_ASSERT(rpos + 2 <= src.len); + RYML_ASSERT(wpos <= rpos); + if(wpos < wcap) // respect write-capacity + src.str[wpos] = c; + rpos += 2; // add 1u to account for the escape character + ++wpos; } - /** Parse into a specific node in an existing tree. - * The immutable YAML source is first copied to the tree's arena, - * and parsed from there. - * The callbacks in the tree are kept, and used to allocate - * the tree members, if any allocation is required. */ - void parse_in_arena(csubstr filename, csubstr csrc, NodeRef node) + void translate_esc_bulk(const char *C4_RESTRICT s, size_t nw, size_t nr) noexcept { - substr src = node.tree()->copy_to_arena(csrc); - this->parse_in_place(filename, src, node.tree(), node.id()); + RYML_ASSERT(nw > 0); + RYML_ASSERT(nr > 0); + RYML_ASSERT(nw <= nr + 1u); + RYML_ASSERT(rpos+nr <= src.len); + RYML_ASSERT(wpos <= rpos); + const size_t wpos_next = wpos + nw; + const size_t rpos_next = rpos + nr + 1u; // add 1u to account for the escape character + RYML_ASSERT(wpos_next <= rpos_next); + if(wpos_next <= wcap) + memcpy(src.str + wpos, s, nw); + rpos = rpos_next; + wpos = wpos_next; } - RYML_DEPRECATED("use parse_in_arena() instead") Tree parse(csubstr filename, csubstr csrc) { return parse_in_arena(filename, csrc); } - RYML_DEPRECATED("use parse_in_arena() instead") void parse(csubstr filename, csubstr csrc, Tree *t) { parse_in_arena(filename, csrc, t); } - RYML_DEPRECATED("use parse_in_arena() instead") void parse(csubstr filename, csubstr csrc, Tree *t, size_t node_id) { parse_in_arena(filename, csrc, t, node_id); } - RYML_DEPRECATED("use parse_in_arena() instead") void parse(csubstr filename, csubstr csrc, NodeRef node) { parse_in_arena(filename, csrc, node); } - - /** @} */ - -public: - - /** @name locations */ - /** @{ */ - - /** Get the location of a node of the last tree to be parsed by this parser. */ - Location location(Tree const& tree, size_t node_id) const; - /** Get the location of a node of the last tree to be parsed by this parser. */ - Location location(ConstNodeRef node) const; - /** Get the string starting at a particular location, to the end - * of the parsed source buffer. */ - csubstr location_contents(Location const& loc) const; - /** Given a pointer to a buffer position, get the location. @p val - * must be pointing to somewhere in the source buffer that was - * last parsed by this object. */ - Location val_location(const char *val) const; - - /** @} */ - -private: - - typedef enum { - BLOCK_LITERAL, //!< keep newlines (|) - BLOCK_FOLD //!< replace newline with single space (>) - } BlockStyle_e; - - typedef enum { - CHOMP_CLIP, //!< single newline at end (default) - CHOMP_STRIP, //!< no newline at end (-) - CHOMP_KEEP //!< all newlines from end (+) - } BlockChomp_e; - -private: - - using flag_t = int; - - static size_t _estimate_capacity(csubstr src) { size_t c = _count_nlines(src); c = c >= 16 ? c : 16; return c; } - - void _reset(); - - bool _finished_file() const; - bool _finished_line() const; - - csubstr _peek_next_line(size_t pos=npos) const; - bool _advance_to_peeked(); - void _scan_line(); - - csubstr _slurp_doc_scalar(); - - /** - * @param [out] quoted - * Will only be written to if this method returns true. - * Will be set to true if the scanned scalar was quoted, by '', "", > or |. - */ - bool _scan_scalar_seq_blck(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted); - bool _scan_scalar_map_blck(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted); - bool _scan_scalar_seq_flow(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted); - bool _scan_scalar_map_flow(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted); - bool _scan_scalar_unk(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted); - - csubstr _scan_comment(); - csubstr _scan_squot_scalar(); - csubstr _scan_dquot_scalar(); - csubstr _scan_block(); - substr _scan_plain_scalar_blck(csubstr currscalar, csubstr peeked_line, size_t indentation); - substr _scan_plain_scalar_flow(csubstr currscalar, csubstr peeked_line); - substr _scan_complex_key(csubstr currscalar, csubstr peeked_line); - csubstr _scan_to_next_nonempty_line(size_t indentation); - csubstr _extend_scanned_scalar(csubstr currscalar); - - csubstr _filter_squot_scalar(const substr s); - csubstr _filter_dquot_scalar(substr s); - csubstr _filter_plain_scalar(substr s, size_t indentation); - csubstr _filter_block_scalar(substr s, BlockStyle_e style, BlockChomp_e chomp, size_t indentation); - template - bool _filter_nl(substr scalar, size_t *C4_RESTRICT pos, size_t *C4_RESTRICT filter_arena_pos, size_t indentation); - template - void _filter_ws(substr scalar, size_t *C4_RESTRICT pos, size_t *C4_RESTRICT filter_arena_pos); - bool _apply_chomp(substr buf, size_t *C4_RESTRICT pos, BlockChomp_e chomp); - - void _handle_finished_file(); - void _handle_line(); - - bool _handle_indentation(); - - bool _handle_unk(); - bool _handle_map_flow(); - bool _handle_map_blck(); - bool _handle_seq_flow(); - bool _handle_seq_blck(); - bool _handle_top(); - bool _handle_types(); - bool _handle_key_anchors_and_refs(); - bool _handle_val_anchors_and_refs(); - void _move_val_tag_to_key_tag(); - void _move_key_tag_to_val_tag(); - void _move_key_tag2_to_key_tag(); - void _move_val_anchor_to_key_anchor(); - void _move_key_anchor_to_val_anchor(); - - void _push_level(bool explicit_flow_chars = false); - void _pop_level(); - - void _start_unk(bool as_child=true); - - void _start_map(bool as_child=true); - void _start_map_unk(bool as_child); - void _stop_map(); - - void _start_seq(bool as_child=true); - void _stop_seq(); - - void _start_seqimap(); - void _stop_seqimap(); - - void _start_doc(bool as_child=true); - void _stop_doc(); - void _start_new_doc(csubstr rem); - void _end_stream(); - - NodeData* _append_val(csubstr val, flag_t quoted=false); - NodeData* _append_key_val(csubstr val, flag_t val_quoted=false); - bool _rval_dash_start_or_continue_seq(); - - void _store_scalar(csubstr s, flag_t is_quoted); - csubstr _consume_scalar(); - void _move_scalar_from_top(); - - inline NodeData* _append_val_null(const char *str) { _RYML_CB_ASSERT(m_stack.m_callbacks, str >= m_buf.begin() && str <= m_buf.end()); return _append_val({nullptr, size_t(0)}); } - inline NodeData* _append_key_val_null(const char *str) { _RYML_CB_ASSERT(m_stack.m_callbacks, str >= m_buf.begin() && str <= m_buf.end()); return _append_key_val({nullptr, size_t(0)}); } - inline void _store_scalar_null(const char *str) { _RYML_CB_ASSERT(m_stack.m_callbacks, str >= m_buf.begin() && str <= m_buf.end()); _store_scalar({nullptr, size_t(0)}, false); } - - void _set_indentation(size_t behind); - void _save_indentation(size_t behind=0); - bool _maybe_set_indentation_from_anchor_or_tag(); - - void _write_key_anchor(size_t node_id); - void _write_val_anchor(size_t node_id); + C4_ALWAYS_INLINE void translate_esc_extending(const char *C4_RESTRICT s, size_t nw, size_t nr) noexcept + { + translate_esc_bulk(s, nw, nr); + } +}; - void _handle_directive(csubstr directive); - void _skipchars(char c); - template - void _skipchars(const char (&chars)[N]); +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- -private: +/** Filters in place. The result may be larger than the source, and + * extending may happen anywhere. As a result some characters may be + * left unfiltered when there is no slack in the buffer and the + * write-position would overlap the read-position. Consequently, it's + * possible for characters to be left unfiltered. In YAML, this + * happens only with double-quoted strings, and only with a small + * number of escape sequences such as `\L` which is substituted by three + * bytes. These escape sequences cause a call to translate_esc_extending() + * which is the only entry point to this unfiltered situation. + * + * @see FilterProcessorInplaceMidExtending */ +struct FilterProcessorInplaceMidExtending +{ + substr src; ///< the subject string + size_t wcap; ///< write capacity - the capacity of the subject string's buffer + size_t rpos; ///< read position + size_t wpos; ///< write position + size_t maxcap; ///< the max capacity needed for filtering the string. This may be larger than the final string size. + bool unfiltered_chars; ///< number of characters that were not added to wpos from lack of capacity - static size_t _count_nlines(csubstr src); + C4_ALWAYS_INLINE FilterProcessorInplaceMidExtending(substr src_, size_t wcap_) noexcept + : src(src_) + , wcap(wcap_) + , rpos(0) + , wpos(0) + , maxcap(src.len) + , unfiltered_chars(false) + { + RYML_ASSERT(wcap >= src.len); + } -private: + C4_ALWAYS_INLINE void setwpos(size_t wpos_) noexcept { wpos = wpos_; } + C4_ALWAYS_INLINE void setpos(size_t rpos_, size_t wpos_) noexcept { rpos = rpos_; wpos = wpos_; } + C4_ALWAYS_INLINE void set_at_end() noexcept { skip(src.len - rpos); } - typedef enum : flag_t { - RTOP = 0x01 << 0, ///< reading at top level - RUNK = 0x01 << 1, ///< reading an unknown: must determine whether scalar, map or seq - RMAP = 0x01 << 2, ///< reading a map - RSEQ = 0x01 << 3, ///< reading a seq - FLOW = 0x01 << 4, ///< reading is inside explicit flow chars: [] or {} - QMRK = 0x01 << 5, ///< reading an explicit key (`? key`) - RKEY = 0x01 << 6, ///< reading a scalar as key - RVAL = 0x01 << 7, ///< reading a scalar as val - RNXT = 0x01 << 8, ///< read next val or keyval - SSCL = 0x01 << 9, ///< there's a stored scalar - QSCL = 0x01 << 10, ///< stored scalar was quoted - RSET = 0x01 << 11, ///< the (implicit) map being read is a !!set. @see https://yaml.org/type/set.html - NDOC = 0x01 << 12, ///< no document mode. a document has ended and another has not started yet. - //! reading an implicit map nested in an explicit seq. - //! eg, {key: [key2: value2, key3: value3]} - //! is parsed as {key: [{key2: value2}, {key3: value3}]} - RSEQIMAP = 0x01 << 13, - } State_e; + C4_ALWAYS_INLINE bool has_more_chars() const noexcept { return rpos < src.len; } + C4_ALWAYS_INLINE bool has_more_chars(size_t maxpos) const noexcept { RYML_ASSERT(maxpos <= src.len); return rpos < maxpos; } - struct LineContents + C4_ALWAYS_INLINE FilterResultExtending result() const noexcept { - csubstr full; ///< the full line, including newlines on the right - csubstr stripped; ///< the stripped line, excluding newlines on the right - csubstr rem; ///< the stripped line remainder; initially starts at the first non-space character - size_t indentation; ///< the number of spaces on the beginning of the line + _c4dbgip("inplace: wpos={} wcap={} unfiltered={} maxcap={}", this->wpos, this->wcap, this->unfiltered_chars, this->maxcap); + FilterResultExtending ret; + ret.str.str = (wpos <= wcap && !unfiltered_chars) ? src.str : nullptr; + ret.str.len = wpos; + ret.reqlen = maxcap; + return ret; + } + C4_ALWAYS_INLINE csubstr sofar() const noexcept { return csubstr(src.str, wpos <= wcap ? wpos : wcap); } + C4_ALWAYS_INLINE csubstr rem() const noexcept { return src.sub(rpos); } - LineContents() : full(), stripped(), rem(), indentation() {} + C4_ALWAYS_INLINE char curr() const noexcept { RYML_ASSERT(rpos < src.len); return src[rpos]; } + C4_ALWAYS_INLINE char next() const noexcept { return rpos+1 < src.len ? src[rpos+1] : '\0'; } - void reset_with_next_line(csubstr buf, size_t pos); + C4_ALWAYS_INLINE void skip() noexcept { ++rpos; } + C4_ALWAYS_INLINE void skip(size_t num) noexcept { rpos += num; } - void reset(csubstr full_, csubstr stripped_) + void set_at(size_t pos, char c) noexcept + { + RYML_ASSERT(pos < wpos); + const size_t save = wpos; + wpos = pos; + set(c); + wpos = save; + } + void set(char c) noexcept + { + if(wpos < wcap) // respect write-capacity { - full = full_; - stripped = stripped_; - rem = stripped_; - // find the first column where the character is not a space - indentation = full.first_not_of(' '); + if((wpos <= rpos) && !unfiltered_chars) + src.str[wpos] = c; } - - size_t current_col() const + else { - return current_col(rem); + _c4dbgip("inplace: add unwritten {}->{} maxcap={}->{}!", unfiltered_chars, true, maxcap, (wpos+1u > maxcap ? wpos+1u : maxcap)); + unfiltered_chars = true; } - - size_t current_col(csubstr s) const + ++wpos; + maxcap = wpos > maxcap ? wpos : maxcap; + } + void set(char c, size_t num) noexcept + { + RYML_ASSERT(num); + if(wpos + num <= wcap) // respect write-capacity { - RYML_ASSERT(s.str >= full.str); - RYML_ASSERT(full.is_super(s)); - size_t col = static_cast(s.str - full.str); - return col; + if((wpos <= rpos) && !unfiltered_chars) + memset(src.str + wpos, c, num); } - }; + else + { + _c4dbgip("inplace: add unwritten {}->{} maxcap={}->{}!", unfiltered_chars, true, maxcap, (wpos+num > maxcap ? wpos+num : maxcap)); + unfiltered_chars = true; + } + wpos += num; + maxcap = wpos > maxcap ? wpos : maxcap; + } - struct State + void copy() noexcept { - flag_t flags; - size_t level; - size_t node_id; // don't hold a pointer to the node as it will be relocated during tree resizes - csubstr scalar; - size_t scalar_col; // the column where the scalar (or its quotes) begin - - Location pos; - LineContents line_contents; - size_t indref; - - State() : flags(), level(), node_id(), scalar(), scalar_col(), pos(), line_contents(), indref() {} - - void reset(const char *file, size_t node_id_) + RYML_ASSERT(rpos < src.len); + if(wpos < wcap) // respect write-capacity { - flags = RUNK|RTOP; - level = 0; - pos.name = to_csubstr(file); - pos.offset = 0; - pos.line = 1; - pos.col = 1; - node_id = node_id_; - scalar_col = 0; - scalar.clear(); - indref = 0; + if((wpos < rpos) && !unfiltered_chars) // write only if wpos is behind rpos + src.str[wpos] = src.str[rpos]; } - }; - - void _line_progressed(size_t ahead); - void _line_ended(); - void _line_ended_undo(); - - void _prepare_pop() + else + { + _c4dbgip("inplace: add unwritten {}->{} (wpos={}!=rpos={})={} (wpos={}{}!", unfiltered_chars, true, wpos, rpos, wpos!=rpos, wpos, wcap, wpos maxcap ? wpos+1u : maxcap)); + unfiltered_chars = true; + } + ++rpos; + ++wpos; + maxcap = wpos > maxcap ? wpos : maxcap; + } + void copy(size_t num) noexcept { - RYML_ASSERT(m_stack.size() > 1); - State const& curr = m_stack.top(); - State & next = m_stack.top(1); - next.pos = curr.pos; - next.line_contents = curr.line_contents; - next.scalar = curr.scalar; + RYML_ASSERT(num); + RYML_ASSERT(rpos+num <= src.len); + if(wpos + num <= wcap) // respect write-capacity + { + if((wpos < rpos) && !unfiltered_chars) // write only if wpos is behind rpos + { + if(wpos + num <= rpos) // there is no overlap + memcpy(src.str + wpos, src.str + rpos, num); + else // there is overlap + memmove(src.str + wpos, src.str + rpos, num); + } + } + else + { + _c4dbgip("inplace: add unwritten {}->{} (wpos={}!=rpos={})={} (wpos={}{}!", unfiltered_chars, true, wpos, rpos, wpos!=rpos, wpos, wcap, wpos maxcap ? wpos : maxcap; } - inline bool _at_line_begin() const + void translate_esc(char c) noexcept { - return m_state->line_contents.rem.begin() == m_state->line_contents.full.begin(); + RYML_ASSERT(rpos + 2 <= src.len); + if(wpos < wcap) // respect write-capacity + { + if((wpos <= rpos) && !unfiltered_chars) + src.str[wpos] = c; + } + else + { + _c4dbgip("inplace: add unfiltered {}->{} maxcap={}->{}!", unfiltered_chars, true, maxcap, (wpos+1u > maxcap ? wpos+1u : maxcap)); + unfiltered_chars = true; + } + rpos += 2; + ++wpos; + maxcap = wpos > maxcap ? wpos : maxcap; } - inline bool _at_line_end() const + + C4_NO_INLINE void translate_esc_bulk(const char *C4_RESTRICT s, size_t nw, size_t nr) noexcept { - csubstr r = m_state->line_contents.rem; - return r.empty() || r.begins_with(' ', r.len); + RYML_ASSERT(nw > 0); + RYML_ASSERT(nr > 0); + RYML_ASSERT(nr+1u >= nw); + const size_t wpos_next = wpos + nw; + const size_t rpos_next = rpos + nr + 1u; // add 1u to account for the escape character + if(wpos_next <= wcap) // respect write-capacity + { + if((wpos <= rpos) && !unfiltered_chars) // write only if wpos is behind rpos + memcpy(src.str + wpos, s, nw); + } + else + { + _c4dbgip("inplace: add unwritten {}->{} (wpos={}!=rpos={})={} (wpos={}{}!", unfiltered_chars, true, wpos, rpos, wpos!=rpos, wpos, wcap, wpos maxcap ? wpos : maxcap; } - inline bool _token_is_from_this_line(csubstr token) const + + C4_NO_INLINE void translate_esc_extending(const char *C4_RESTRICT s, size_t nw, size_t nr) noexcept { - return token.is_sub(m_state->line_contents.full); + RYML_ASSERT(nw > 0); + RYML_ASSERT(nr > 0); + RYML_ASSERT(rpos+nr <= src.len); + const size_t wpos_next = wpos + nw; + const size_t rpos_next = rpos + nr + 1u; // add 1u to account for the escape character + if(wpos_next <= rpos_next) // read and write do not overlap. just do a vanilla copy. + { + if((wpos_next <= wcap) && !unfiltered_chars) + memcpy(src.str + wpos, s, nw); + rpos = rpos_next; + wpos = wpos_next; + maxcap = wpos > maxcap ? wpos : maxcap; + } + else // there is overlap. move the (to-be-read) string to the right. + { + const size_t excess = wpos_next - rpos_next; + RYML_ASSERT(wpos_next > rpos_next); + if(src.len + excess <= wcap) // ensure we do not go past the end + { + RYML_ASSERT(rpos+nr+excess <= src.len); + if(wpos_next <= wcap) + { + if(!unfiltered_chars) + { + memmove(src.str + wpos_next, src.str + rpos_next, src.len - rpos_next); + memcpy(src.str + wpos, s, nw); + } + rpos = wpos_next; // wpos, not rpos + } + else + { + rpos = rpos_next; + //const size_t unw = nw > (nr + 1u) ? nw - (nr + 1u) : 0; + _c4dbgip("inplace: add unfiltered {}->{} maxcap={}->{}!", unfiltered_chars, true); + unfiltered_chars = true; + } + wpos = wpos_next; + // extend the string up to capacity + src.len += excess; + maxcap = wpos > maxcap ? wpos : maxcap; + } + else + { + //const size_t unw = nw > (nr + 1u) ? nw - (nr + 1u) : 0; + RYML_ASSERT(rpos_next <= src.len); + const size_t required_size = wpos_next + (src.len - rpos_next); + _c4dbgip("inplace: add unfiltered {}->{} maxcap={}->{}!", unfiltered_chars, true, maxcap, required_size > maxcap ? required_size : maxcap); + RYML_ASSERT(required_size > wcap); + unfiltered_chars = true; + maxcap = required_size > maxcap ? required_size : maxcap; + wpos = wpos_next; + rpos = rpos_next; + } + } } +}; - inline NodeData * node(State const* s) const { return m_tree->get(s->node_id); } - inline NodeData * node(State const& s) const { return m_tree->get(s .node_id); } - inline NodeData * node(size_t node_id) const { return m_tree->get( node_id); } +#undef _c4dbgip - inline bool has_all(flag_t f) const { return (m_state->flags & f) == f; } - inline bool has_any(flag_t f) const { return (m_state->flags & f) != 0; } - inline bool has_none(flag_t f) const { return (m_state->flags & f) == 0; } - static inline bool has_all(flag_t f, State const* s) { return (s->flags & f) == f; } - static inline bool has_any(flag_t f, State const* s) { return (s->flags & f) != 0; } - static inline bool has_none(flag_t f, State const* s) { return (s->flags & f) == 0; } +/** @} */ - inline void set_flags(flag_t f) { set_flags(f, m_state); } - inline void add_flags(flag_t on) { add_flags(on, m_state); } - inline void addrem_flags(flag_t on, flag_t off) { addrem_flags(on, off, m_state); } - inline void rem_flags(flag_t off) { rem_flags(off, m_state); } +} // namespace yml +} // namespace c4 - void set_flags(flag_t f, State * s); - void add_flags(flag_t on, State * s); - void addrem_flags(flag_t on, flag_t off, State * s); - void rem_flags(flag_t off, State * s); +#endif /* _C4_YML_FILTER_PROCESSOR_HPP_ */ - void _resize_filter_arena(size_t num_characters); - void _grow_filter_arena(size_t num_characters); - substr _finish_filter_arena(substr dst, size_t pos); - void _prepare_locations(); - void _resize_locations(size_t sz); - bool _locations_dirty() const; +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/filter_processor.hpp) - bool _location_from_cont(Tree const& tree, size_t node, Location *C4_RESTRICT loc) const; - bool _location_from_node(Tree const& tree, size_t node, Location *C4_RESTRICT loc, size_t level) const; -private: - void _free(); - void _clr(); - void _cp(Parser const* that); - void _mv(Parser *that); +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/parser_state.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/parser_state.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_PARSER_STATE_HPP_ +#define _C4_YML_PARSER_STATE_HPP_ + +#ifndef _C4_YML_COMMON_HPP_ +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/common.hpp +//#include "c4/yml/common.hpp" +#if !defined(C4_YML_COMMON_HPP_) && !defined(_C4_YML_COMMON_HPP_) +#error "amalgamate: file c4/yml/common.hpp must have been included at this point" +#endif /* C4_YML_COMMON_HPP_ */ -#ifdef RYML_DBG - template void _dbg(csubstr fmt, Args const& C4_RESTRICT ...args) const; #endif - template void _err(csubstr fmt, Args const& C4_RESTRICT ...args) const; - template void _fmt_msg(DumpFn &&dumpfn) const; - static csubstr _prfl(substr buf, flag_t v); -private: +namespace c4 { +namespace yml { - ParserOptions m_options; +/** data type for @ref ParserState_e */ +using ParserFlag_t = int; + +/** Enumeration of the state flags for the parser */ +typedef enum : ParserFlag_t { + RTOP = 0x01 << 0, ///< reading at top level + RUNK = 0x01 << 1, ///< reading unknown state (when starting): must determine whether scalar, map or seq + RMAP = 0x01 << 2, ///< reading a map + RSEQ = 0x01 << 3, ///< reading a seq + FLOW = 0x01 << 4, ///< reading is inside explicit flow chars: [] or {} + BLCK = 0x01 << 5, ///< reading in block mode + QMRK = 0x01 << 6, ///< reading an explicit key (`? key`) + RKEY = 0x01 << 7, ///< reading a scalar as key + RVAL = 0x01 << 9, ///< reading a scalar as val + RKCL = 0x01 << 8, ///< reading the key colon (ie the : after the key in the map) + RNXT = 0x01 << 10, ///< read next val or keyval + SSCL = 0x01 << 11, ///< there's a stored scalar + QSCL = 0x01 << 12, ///< stored scalar was quoted + RSET = 0x01 << 13, ///< the (implicit) map being read is a !!set. @see https://yaml.org/type/set.html + RDOC = 0x01 << 14, ///< reading a document + NDOC = 0x01 << 15, ///< no document mode. a document has ended and another has not started yet. + USTY = 0x01 << 16, ///< reading in unknown style mode - must determine FLOW or BLCK + //! reading an implicit map nested in an explicit seq. + //! eg, {key: [key2: value2, key3: value3]} + //! is parsed as {key: [{key2: value2}, {key3: value3}]} + RSEQIMAP = 0x01 << 17, +} ParserState_e; - csubstr m_file; - substr m_buf; +#ifdef RYML_DBG +/** @cond dev */ +namespace detail { +csubstr _parser_flags_to_str(substr buf, ParserFlag_t flags); +} // namespace +/** @endcond */ +#endif - size_t m_root_id; - Tree * m_tree; - detail::stack m_stack; - State * m_state; +/** Helper to control the line contents while parsing a buffer */ +struct LineContents +{ + substr rem; ///< the stripped line remainder; initially starts at the first non-space character + size_t indentation; ///< the number of spaces on the beginning of the line + substr full; ///< the full line, including newlines on the right + substr stripped; ///< the stripped line, excluding newlines on the right - size_t m_key_tag_indentation; - size_t m_key_tag2_indentation; - csubstr m_key_tag; - csubstr m_key_tag2; - size_t m_val_tag_indentation; - csubstr m_val_tag; + LineContents() = default; - bool m_key_anchor_was_before; - size_t m_key_anchor_indentation; - csubstr m_key_anchor; - size_t m_val_anchor_indentation; - csubstr m_val_anchor; + void reset_with_next_line(substr buf, size_t offset) + { + RYML_ASSERT(offset <= buf.len); + size_t e = offset; + // get the current line stripped of newline chars + while(e < buf.len && (buf.str[e] != '\n' && buf.str[e] != '\r')) + ++e; + RYML_ASSERT(e >= offset); + const substr stripped_ = buf.range(offset, e); + // advance pos to include the first line ending + if(e < buf.len && buf.str[e] == '\r') + ++e; + if(e < buf.len && buf.str[e] == '\n') + ++e; + const substr full_ = buf.range(offset, e); + reset(full_, stripped_); + } - substr m_filter_arena; + void reset(substr full_, substr stripped_) + { + rem = stripped_; + indentation = stripped_.first_not_of(' '); // find the first column where the character is not a space + full = full_; + stripped = stripped_; + } - size_t *m_newline_offsets; - size_t m_newline_offsets_size; - size_t m_newline_offsets_capacity; - csubstr m_newline_offsets_buf; + C4_ALWAYS_INLINE size_t current_col() const RYML_NOEXCEPT + { + // WARNING: gcc x86 release builds were wrong (eg returning 0 + // when the result should be 4 ) when this function was like + // this: + // + //return current_col(rem); + // + // (see below for the full definition of the called overload + // of current_col()) + // + // ... so we explicitly inline the code in here: + RYML_ASSERT(rem.str >= full.str); + size_t col = static_cast(rem.str - full.str); + return col; + // + // this was happening only on builds specifically with (gcc + // AND x86 AND release); no other builds were having the + // problem: not in debug, not in x64, not in other + // architectures, not in clang, not in visual studio. WTF!? + // + // Enabling debug prints with RYML_DBG made the problem go + // away, so these could not be used to debug the + // problem. Adding prints inside the called current_col() also + // made the problem go away! WTF!??? + // + // a prize will be offered to anybody able to explain why this + // was happening. + } + + C4_ALWAYS_INLINE size_t current_col(csubstr s) const RYML_NOEXCEPT + { + RYML_ASSERT(s.str >= full.str); + RYML_ASSERT(full.is_super(s)); + size_t col = static_cast(s.str - full.str); + return col; + } }; +static_assert(std::is_standard_layout::value, "LineContents not standard"); //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- -/** @name parse_in_place - * - * @desc parse a mutable YAML source buffer. - * - * @note These freestanding functions use a temporary parser object, - * and are convenience functions to easily parse YAML without the need - * to instantiate a separate parser. Note that some properties - * (notably node locations in the original source code) are only - * available through the parser object after it has parsed the - * code. If you need access to any of these properties, use - * Parser::parse_in_place() */ -/** @{ */ +struct ParserState +{ + LineContents line_contents; + Location pos; + ParserFlag_t flags; + size_t indref; ///< the reference indentation in the current block scope + id_type level; + id_type node_id; ///< don't hold a pointer to the node as it will be relocated during tree resizes + size_t scalar_col; // the column where the scalar (or its quotes) begin + bool more_indented; + bool has_children; -inline Tree parse_in_place( substr yaml ) { Parser np; return np.parse_in_place({} , yaml); } //!< parse in-situ a modifiable YAML source buffer. -inline Tree parse_in_place(csubstr filename, substr yaml ) { Parser np; return np.parse_in_place(filename, yaml); } //!< parse in-situ a modifiable YAML source buffer, providing a filename for error messages. -inline void parse_in_place( substr yaml, Tree *t ) { Parser np; np.parse_in_place({} , yaml, t); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer -inline void parse_in_place(csubstr filename, substr yaml, Tree *t ) { Parser np; np.parse_in_place(filename, yaml, t); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer, providing a filename for error messages. -inline void parse_in_place( substr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_place({} , yaml, t, node_id); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer -inline void parse_in_place(csubstr filename, substr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_place(filename, yaml, t, node_id); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer, providing a filename for error messages. -inline void parse_in_place( substr yaml, NodeRef node ) { Parser np; np.parse_in_place({} , yaml, node); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer -inline void parse_in_place(csubstr filename, substr yaml, NodeRef node ) { Parser np; np.parse_in_place(filename, yaml, node); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer, providing a filename for error messages. - -RYML_DEPRECATED("use parse_in_place() instead") inline Tree parse( substr yaml ) { Parser np; return np.parse_in_place({} , yaml); } -RYML_DEPRECATED("use parse_in_place() instead") inline Tree parse(csubstr filename, substr yaml ) { Parser np; return np.parse_in_place(filename, yaml); } -RYML_DEPRECATED("use parse_in_place() instead") inline void parse( substr yaml, Tree *t ) { Parser np; np.parse_in_place({} , yaml, t); } -RYML_DEPRECATED("use parse_in_place() instead") inline void parse(csubstr filename, substr yaml, Tree *t ) { Parser np; np.parse_in_place(filename, yaml, t); } -RYML_DEPRECATED("use parse_in_place() instead") inline void parse( substr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_place({} , yaml, t, node_id); } -RYML_DEPRECATED("use parse_in_place() instead") inline void parse(csubstr filename, substr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_place(filename, yaml, t, node_id); } -RYML_DEPRECATED("use parse_in_place() instead") inline void parse( substr yaml, NodeRef node ) { Parser np; np.parse_in_place({} , yaml, node); } -RYML_DEPRECATED("use parse_in_place() instead") inline void parse(csubstr filename, substr yaml, NodeRef node ) { Parser np; np.parse_in_place(filename, yaml, node); } + ParserState() = default; -/** @} */ + void start_parse(const char *file, id_type node_id_) + { + level = 0; + pos.name = to_csubstr(file); + pos.offset = 0; + pos.line = 1; + pos.col = 1; + node_id = node_id_; + more_indented = false; + scalar_col = 0; + indref = 0; + has_children = false; + } + void reset_after_push() + { + node_id = NONE; + indref = npos; + more_indented = false; + ++level; + has_children = false; + } -//----------------------------------------------------------------------------- + C4_ALWAYS_INLINE void reset_before_pop(ParserState const& to_pop) + { + pos = to_pop.pos; + line_contents = to_pop.line_contents; + } -/** @name parse_in_arena - * @desc parse a read-only YAML source buffer, copying it first to the tree's arena. - * - * @note These freestanding functions use a temporary parser object, - * and are convenience functions to easily parse YAML without the need - * to instantiate a separate parser. Note that some properties - * (notably node locations in the original source code) are only - * available through the parser object after it has parsed the - * code. If you need access to any of these properties, use - * Parser::parse_in_arena(). - * - * @note overloads receiving a substr YAML buffer are intentionally - * left undefined, such that calling parse_in_arena() with a substr - * will cause a linker error. This is to prevent an accidental - * copy of the source buffer to the tree's arena, because substr - * is implicitly convertible to csubstr. If you really intend to parse - * a mutable buffer in the tree's arena, convert it first to immutable - * by assigning the substr to a csubstr prior to calling parse_in_arena(). - * This is not needed for parse_in_place() because csubstr is not - * implicitly convertible to substr. */ -/** @{ */ +public: -/* READ THE NOTE ABOVE! */ -RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_in_arena( substr yaml ); -RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_in_arena(csubstr filename, substr yaml ); -RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena( substr yaml, Tree *t ); -RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr yaml, Tree *t ); -RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena( substr yaml, Tree *t, size_t node_id); -RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr yaml, Tree *t, size_t node_id); -RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena( substr yaml, NodeRef node ); -RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr yaml, NodeRef node ); - -inline Tree parse_in_arena( csubstr yaml ) { Parser np; return np.parse_in_arena({} , yaml); } //!< parse a read-only YAML source buffer, copying it first to the tree's source arena. -inline Tree parse_in_arena(csubstr filename, csubstr yaml ) { Parser np; return np.parse_in_arena(filename, yaml); } //!< parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. -inline void parse_in_arena( csubstr yaml, Tree *t ) { Parser np; np.parse_in_arena({} , yaml, t); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena. -inline void parse_in_arena(csubstr filename, csubstr yaml, Tree *t ) { Parser np; np.parse_in_arena(filename, yaml, t); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. -inline void parse_in_arena( csubstr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_arena({} , yaml, t, node_id); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena. -inline void parse_in_arena(csubstr filename, csubstr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_arena(filename, yaml, t, node_id); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. -inline void parse_in_arena( csubstr yaml, NodeRef node ) { Parser np; np.parse_in_arena({} , yaml, node); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena. -inline void parse_in_arena(csubstr filename, csubstr yaml, NodeRef node ) { Parser np; np.parse_in_arena(filename, yaml, node); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. - -RYML_DEPRECATED("use parse_in_arena() instead") inline Tree parse( csubstr yaml ) { Parser np; return np.parse_in_arena({} , yaml); } //!< parse a read-only YAML source buffer, copying it first to the tree's source arena. -RYML_DEPRECATED("use parse_in_arena() instead") inline Tree parse(csubstr filename, csubstr yaml ) { Parser np; return np.parse_in_arena(filename, yaml); } //!< parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. -RYML_DEPRECATED("use parse_in_arena() instead") inline void parse( csubstr yaml, Tree *t ) { Parser np; np.parse_in_arena({} , yaml, t); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena. -RYML_DEPRECATED("use parse_in_arena() instead") inline void parse(csubstr filename, csubstr yaml, Tree *t ) { Parser np; np.parse_in_arena(filename, yaml, t); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. -RYML_DEPRECATED("use parse_in_arena() instead") inline void parse( csubstr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_arena({} , yaml, t, node_id); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena. -RYML_DEPRECATED("use parse_in_arena() instead") inline void parse(csubstr filename, csubstr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_arena(filename, yaml, t, node_id); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. -RYML_DEPRECATED("use parse_in_arena() instead") inline void parse( csubstr yaml, NodeRef node ) { Parser np; np.parse_in_arena({} , yaml, node); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena. -RYML_DEPRECATED("use parse_in_arena() instead") inline void parse(csubstr filename, csubstr yaml, NodeRef node ) { Parser np; np.parse_in_arena(filename, yaml, node); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. + C4_ALWAYS_INLINE bool at_line_beginning() const noexcept + { + return line_contents.rem.str == line_contents.full.str; + } + C4_ALWAYS_INLINE bool indentation_eq() const noexcept + { + RYML_ASSERT(indref != npos); + return line_contents.indentation != npos && line_contents.indentation == indref; + } + C4_ALWAYS_INLINE bool indentation_ge() const noexcept + { + RYML_ASSERT(indref != npos); + return line_contents.indentation != npos && line_contents.indentation >= indref; + } + C4_ALWAYS_INLINE bool indentation_gt() const noexcept + { + RYML_ASSERT(indref != npos); + return line_contents.indentation != npos && line_contents.indentation > indref; + } + C4_ALWAYS_INLINE bool indentation_lt() const noexcept + { + RYML_ASSERT(indref != npos); + return line_contents.indentation != npos && line_contents.indentation < indref; + } +}; +static_assert(std::is_standard_layout::value, "ParserState not standard"); -/** @} */ } // namespace yml } // namespace c4 -#if defined(_MSC_VER) -# pragma warning(pop) -#endif - -#endif /* _C4_YML_PARSE_HPP_ */ +#endif /* _C4_YML_PARSER_STATE_HPP_ */ -// (end https://github.com/biojppm/rapidyaml/src/c4/yml/parse.hpp) +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/parser_state.hpp) //******************************************************************************** //-------------------------------------------------------------------------------- -// src/c4/yml/std/map.hpp -// https://github.com/biojppm/rapidyaml/src/c4/yml/std/map.hpp +// src/c4/yml/event_handler_stack.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/event_handler_stack.hpp //-------------------------------------------------------------------------------- //******************************************************************************** -#ifndef _C4_YML_STD_MAP_HPP_ -#define _C4_YML_STD_MAP_HPP_ +#ifndef _C4_YML_EVENT_HANDLER_STACK_HPP_ +#define _C4_YML_EVENT_HANDLER_STACK_HPP_ -/** @file map.hpp write/read std::map to/from a YAML tree. */ +#ifndef _C4_YML_DETAIL_STACK_HPP_ +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/stack.hpp +//#include "c4/yml/detail/stack.hpp" +#if !defined(C4_YML_DETAIL_STACK_HPP_) && !defined(_C4_YML_DETAIL_STACK_HPP_) +#error "amalgamate: file c4/yml/detail/stack.hpp must have been included at this point" +#endif /* C4_YML_DETAIL_STACK_HPP_ */ +#endif + +#ifndef _C4_YML_DETAIL_PARSER_DBG_HPP_ // amalgamate: removed include of -// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp -//#include "c4/yml/node.hpp" -#if !defined(C4_YML_NODE_HPP_) && !defined(_C4_YML_NODE_HPP_) -#error "amalgamate: file c4/yml/node.hpp must have been included at this point" -#endif /* C4_YML_NODE_HPP_ */ +// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp +//#include "c4/yml/detail/parser_dbg.hpp" +#if !defined(C4_YML_DETAIL_PARSER_DBG_HPP_) && !defined(_C4_YML_DETAIL_PARSER_DBG_HPP_) +#error "amalgamate: file c4/yml/detail/parser_dbg.hpp must have been included at this point" +#endif /* C4_YML_DETAIL_PARSER_DBG_HPP_ */ -#include +#endif -namespace c4 { -namespace yml { +#ifndef _C4_YML_PARSER_STATE_HPP_ +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/parser_state.hpp +//#include "c4/yml/parser_state.hpp" +#if !defined(C4_YML_PARSER_STATE_HPP_) && !defined(_C4_YML_PARSER_STATE_HPP_) +#error "amalgamate: file c4/yml/parser_state.hpp must have been included at this point" +#endif /* C4_YML_PARSER_STATE_HPP_ */ -// std::map requires child nodes in the data -// tree hierarchy (a MAP node in ryml parlance). -// So it should be serialized via write()/read(). +#endif -template -void write(c4::yml::NodeRef *n, std::map const& m) -{ - *n |= c4::yml::MAP; - for(auto const& C4_RESTRICT p : m) - { - auto ch = n->append_child(); - ch << c4::yml::key(p.first); - ch << p.second; - } -} - -template -bool read(c4::yml::ConstNodeRef const& n, std::map * m) -{ - K k{}; - V v{}; - for(auto const& C4_RESTRICT ch : n) - { - ch >> c4::yml::key(k); - ch >> v; - m->emplace(std::make_pair(std::move(k), std::move(v))); - } - return true; -} +#ifdef RYML_DBG +#ifndef _C4_YML_DETAIL_PRINT_HPP_ +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/print.hpp +//#include "c4/yml/detail/print.hpp" +#if !defined(C4_YML_DETAIL_PRINT_HPP_) && !defined(_C4_YML_DETAIL_PRINT_HPP_) +#error "amalgamate: file c4/yml/detail/print.hpp must have been included at this point" +#endif /* C4_YML_DETAIL_PRINT_HPP_ */ -} // namespace yml -} // namespace c4 +#endif +#endif -#endif // _C4_YML_STD_MAP_HPP_ +namespace c4 { +namespace yml { +/** @addtogroup doc_event_handlers + * @{ */ -// (end https://github.com/biojppm/rapidyaml/src/c4/yml/std/map.hpp) +namespace detail { +using pfn_relocate_arena = void (*)(void*, csubstr prev_arena, substr next_arena); +} // detail +/** Use this class a base of implementations of event handler to + * simplify the stack logic. */ +template +struct EventHandlerStack +{ + static_assert(std::is_base_of::value, + "ParserState must be a base of HandlerState"); + using state = HandlerState; + using pfn_relocate_arena = detail::pfn_relocate_arena; -//******************************************************************************** -//-------------------------------------------------------------------------------- -// src/c4/yml/std/string.hpp -// https://github.com/biojppm/rapidyaml/src/c4/yml/std/string.hpp -//-------------------------------------------------------------------------------- -//******************************************************************************** +public: -#ifndef C4_YML_STD_STRING_HPP_ -#define C4_YML_STD_STRING_HPP_ + detail::stack m_stack; + state *C4_RESTRICT m_curr; ///< current stack level: top of the stack. cached here for easier access. + state *C4_RESTRICT m_parent; ///< parent of the current stack level. + pfn_relocate_arena m_relocate_arena; ///< callback when the arena gets relocated + void * m_relocate_arena_data; -/** @file string.hpp substring conversions for/from std::string */ +protected: -// everything we need is implemented here: -// amalgamate: removed include of -// https://github.com/biojppm/rapidyaml/src/c4/std/string.hpp -//#include -#if !defined(C4_STD_STRING_HPP_) && !defined(_C4_STD_STRING_HPP_) -#error "amalgamate: file c4/std/string.hpp must have been included at this point" -#endif /* C4_STD_STRING_HPP_ */ + EventHandlerStack() : m_stack(), m_curr(), m_parent(), m_relocate_arena(), m_relocate_arena_data() {} + EventHandlerStack(Callbacks const& cb) : m_stack(cb), m_curr(), m_parent(), m_relocate_arena(), m_relocate_arena_data() {} +protected: -#endif // C4_YML_STD_STRING_HPP_ + void _stack_start_parse(const char *filename, pfn_relocate_arena relocate_arena, void *relocate_arena_data) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, m_curr != nullptr); + _RYML_CB_ASSERT(m_stack.m_callbacks, relocate_arena != nullptr); + _RYML_CB_ASSERT(m_stack.m_callbacks, relocate_arena_data != nullptr); + m_curr->start_parse(filename, m_curr->node_id); + m_relocate_arena = relocate_arena; + m_relocate_arena_data = relocate_arena_data; + } + void _stack_finish_parse() + { + } -// (end https://github.com/biojppm/rapidyaml/src/c4/yml/std/string.hpp) +protected: + void _stack_reset_root() + { + m_stack.clear(); + m_stack.push({}); + m_parent = nullptr; + m_curr = &m_stack.top(); + } + void _stack_reset_non_root() + { + m_stack.clear(); + m_stack.push({}); // parent + m_stack.push({}); // node + m_parent = &m_stack.top(1); + m_curr = &m_stack.top(); + } -//******************************************************************************** -//-------------------------------------------------------------------------------- -// src/c4/yml/std/vector.hpp -// https://github.com/biojppm/rapidyaml/src/c4/yml/std/vector.hpp -//-------------------------------------------------------------------------------- -//******************************************************************************** + void _stack_push() + { + m_stack.push_top(); + m_parent = &m_stack.top(1); // don't use m_curr. watch out for relocations inside the prev push + m_curr = &m_stack.top(); + m_curr->reset_after_push(); + } -#ifndef _C4_YML_STD_VECTOR_HPP_ -#define _C4_YML_STD_VECTOR_HPP_ + void _stack_pop() + { + _RYML_CB_ASSERT(m_stack.m_callbacks, m_parent); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_stack.size() > 1); + m_parent->reset_before_pop(*m_curr); + m_stack.pop(); + m_parent = m_stack.size() > 1 ? &m_stack.top(1) : nullptr; + m_curr = &m_stack.top(); + #ifdef RYML_DBG + if(m_parent) + _c4dbgpf("popped! top is now node={} (parent={})", m_curr->node_id, m_parent->node_id); + else + _c4dbgpf("popped! top is now node={} @ ROOT", m_curr->node_id); + #endif + } -// amalgamate: removed include of -// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp -//#include "c4/yml/node.hpp" -#if !defined(C4_YML_NODE_HPP_) && !defined(_C4_YML_NODE_HPP_) -#error "amalgamate: file c4/yml/node.hpp must have been included at this point" -#endif /* C4_YML_NODE_HPP_ */ +protected: -// amalgamate: removed include of -// https://github.com/biojppm/rapidyaml/src/c4/std/vector.hpp -//#include -#if !defined(C4_STD_VECTOR_HPP_) && !defined(_C4_STD_VECTOR_HPP_) -#error "amalgamate: file c4/std/vector.hpp must have been included at this point" -#endif /* C4_STD_VECTOR_HPP_ */ + // undefined at the end + #define _has_any_(bits) (static_cast(this)->template _has_any__()) -//included above: -//#include + bool _stack_should_push_on_begin_doc() const + { + const bool is_root = (m_stack.size() == 1u); + return is_root && (_has_any_(DOC|VAL|MAP|SEQ) || m_curr->has_children); + } -namespace c4 { -namespace yml { + bool _stack_should_pop_on_end_doc() const + { + const bool is_root = (m_stack.size() == 1u); + return !is_root && _has_any_(DOC); + } -// vector is a sequence-like type, and it requires child nodes -// in the data tree hierarchy (a SEQ node in ryml parlance). -// So it should be serialized via write()/read(). +protected: + void _stack_relocate_to_new_arena(csubstr prev, substr curr) + { + for(state &st : m_stack) + { + if(st.line_contents.rem.is_sub(prev)) + st.line_contents.rem = _stack_relocate_to_new_arena(st.line_contents.rem, prev, curr); + if(st.line_contents.full.is_sub(prev)) + st.line_contents.full = _stack_relocate_to_new_arena(st.line_contents.full, prev, curr); + if(st.line_contents.stripped.is_sub(prev)) + st.line_contents.stripped = _stack_relocate_to_new_arena(st.line_contents.stripped, prev, curr); + } + _RYML_CB_ASSERT(m_stack.m_callbacks, m_relocate_arena != nullptr); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_relocate_arena_data != nullptr); + m_relocate_arena(m_relocate_arena_data, prev, curr); + } -template -void write(c4::yml::NodeRef *n, std::vector const& vec) -{ - *n |= c4::yml::SEQ; - for(auto const& v : vec) - n->append_child() << v; -} + substr _stack_relocate_to_new_arena(csubstr s, csubstr prev, substr curr) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, prev.is_super(s)); + auto pos = s.str - prev.str; + substr out = {curr.str + pos, s.len}; + _RYML_CB_ASSERT(m_stack.m_callbacks, curr.is_super(out)); + return out; + } -template -bool read(c4::yml::ConstNodeRef const& n, std::vector *vec) -{ - vec->resize(n.num_children()); - size_t pos = 0; - for(auto const ch : n) - ch >> (*vec)[pos++]; - return true; -} +public: -/** specialization: std::vector uses std::vector::reference as - * the return value of its operator[]. */ -template -bool read(c4::yml::ConstNodeRef const& n, std::vector *vec) -{ - vec->resize(n.num_children()); - size_t pos = 0; - bool tmp; - for(auto const ch : n) + /** Check whether the current parse tokens are trailing on the + * previous doc, and raise an error if they are. This function is + * called by the parse engine (not the event handler) before a doc + * is started. */ + void check_trailing_doc_token() const { - ch >> tmp; - (*vec)[pos++] = tmp; + const bool is_root = (m_stack.size() == 1u); + const bool isndoc = (m_curr->flags & NDOC) != 0; + const bool suspicious = _has_any_(MAP|SEQ|VAL); + _c4dbgpf("target={} isroot={} suspicious={} ndoc={}", m_curr->node_id, is_root, suspicious, isndoc); + if((is_root || _has_any_(DOC)) && suspicious && !isndoc) + _RYML_CB_ERR_(m_stack.m_callbacks, "parse error", m_curr->pos); } - return true; -} + +protected: + + #undef _has_any_ + +}; + +/** @} */ } // namespace yml } // namespace c4 -#endif // _C4_YML_STD_VECTOR_HPP_ +#endif /* _C4_YML_EVENT_HANDLER_STACK_HPP_ */ -// (end https://github.com/biojppm/rapidyaml/src/c4/yml/std/vector.hpp) +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/event_handler_stack.hpp) //******************************************************************************** //-------------------------------------------------------------------------------- -// src/c4/yml/std/std.hpp -// https://github.com/biojppm/rapidyaml/src/c4/yml/std/std.hpp +// src/c4/yml/event_handler_tree.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/event_handler_tree.hpp //-------------------------------------------------------------------------------- //******************************************************************************** -#ifndef _C4_YML_STD_STD_HPP_ -#define _C4_YML_STD_STD_HPP_ +#ifndef _C4_YML_EVENT_HANDLER_TREE_HPP_ +#define _C4_YML_EVENT_HANDLER_TREE_HPP_ +#ifndef _C4_YML_TREE_HPP_ // amalgamate: removed include of -// https://github.com/biojppm/rapidyaml/src/c4/yml/std/string.hpp -//#include "c4/yml/std/string.hpp" -#if !defined(C4_YML_STD_STRING_HPP_) && !defined(_C4_YML_STD_STRING_HPP_) -#error "amalgamate: file c4/yml/std/string.hpp must have been included at this point" -#endif /* C4_YML_STD_STRING_HPP_ */ +// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp +//#include "c4/yml/tree.hpp" +#if !defined(C4_YML_TREE_HPP_) && !defined(_C4_YML_TREE_HPP_) +#error "amalgamate: file c4/yml/tree.hpp must have been included at this point" +#endif /* C4_YML_TREE_HPP_ */ -// amalgamate: removed include of -// https://github.com/biojppm/rapidyaml/src/c4/yml/std/vector.hpp -//#include "c4/yml/std/vector.hpp" -#if !defined(C4_YML_STD_VECTOR_HPP_) && !defined(_C4_YML_STD_VECTOR_HPP_) -#error "amalgamate: file c4/yml/std/vector.hpp must have been included at this point" -#endif /* C4_YML_STD_VECTOR_HPP_ */ +#endif +#ifndef _C4_YML_EVENT_HANDLER_STACK_HPP_ // amalgamate: removed include of -// https://github.com/biojppm/rapidyaml/src/c4/yml/std/map.hpp -//#include "c4/yml/std/map.hpp" -#if !defined(C4_YML_STD_MAP_HPP_) && !defined(_C4_YML_STD_MAP_HPP_) -#error "amalgamate: file c4/yml/std/map.hpp must have been included at this point" -#endif /* C4_YML_STD_MAP_HPP_ */ +// https://github.com/biojppm/rapidyaml/src/c4/yml/event_handler_stack.hpp +//#include "c4/yml/event_handler_stack.hpp" +#if !defined(C4_YML_EVENT_HANDLER_STACK_HPP_) && !defined(_C4_YML_EVENT_HANDLER_STACK_HPP_) +#error "amalgamate: file c4/yml/event_handler_stack.hpp must have been included at this point" +#endif /* C4_YML_EVENT_HANDLER_STACK_HPP_ */ +#endif -#endif // _C4_YML_STD_STD_HPP_ +C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(4702) // unreachable code +namespace c4 { +namespace yml { -// (end https://github.com/biojppm/rapidyaml/src/c4/yml/std/std.hpp) +/** @addtogroup doc_event_handlers + * @{ */ +/** The stack state needed specifically by @ref EventHandlerTree */ +struct EventHandlerTreeState : public ParserState +{ + NodeData *tr_data; +}; -//******************************************************************************** -//-------------------------------------------------------------------------------- -// src/c4/yml/common.cpp -// https://github.com/biojppm/rapidyaml/src/c4/yml/common.cpp -//-------------------------------------------------------------------------------- -//******************************************************************************** -#ifdef RYML_SINGLE_HDR_DEFINE_NOW -// amalgamate: removed include of -// https://github.com/biojppm/rapidyaml/src/c4/yml/common.hpp -//#include "c4/yml/common.hpp" -#if !defined(C4_YML_COMMON_HPP_) && !defined(_C4_YML_COMMON_HPP_) -#error "amalgamate: file c4/yml/common.hpp must have been included at this point" -#endif /* C4_YML_COMMON_HPP_ */ +/** The event handler to create a ryml @ref Tree. See the + * documentation for @ref doc_event_handlers, which has important + * notes about the event model used by rapidyaml. */ +struct EventHandlerTree : public EventHandlerStack +{ + /** @name types + * @{ */ -#ifndef RYML_NO_DEFAULT_CALLBACKS -//included above: -//# include -//included above: -//# include -#endif // RYML_NO_DEFAULT_CALLBACKS + using state = EventHandlerTreeState; -namespace c4 { -namespace yml { + /** @} */ -namespace { -Callbacks s_default_callbacks; -} // anon namespace +public: -#ifndef RYML_NO_DEFAULT_CALLBACKS -void report_error_impl(const char* msg, size_t length, Location loc, FILE *f) -{ - if(!f) - f = stderr; - if(loc) + /** @cond dev */ + Tree *C4_RESTRICT m_tree; + id_type m_id; + size_t m_num_directives; + bool m_yaml_directive; + + #if RYML_DBG + #define _enable_(bits) _enable__(); _c4dbgpf("node[{}]: enable {}", m_curr->node_id, #bits) + #define _disable_(bits) _disable__(); _c4dbgpf("node[{}]: disable {}", m_curr->node_id, #bits) + #else + #define _enable_(bits) _enable__() + #define _disable_(bits) _disable__() + #endif + #define _has_any_(bits) _has_any__() + /** @endcond */ + +public: + + /** @name construction and resetting + * @{ */ + + EventHandlerTree() : EventHandlerStack(), m_tree(), m_id(NONE), m_num_directives(), m_yaml_directive() {} + EventHandlerTree(Callbacks const& cb) : EventHandlerStack(cb), m_tree(), m_id(NONE), m_num_directives(), m_yaml_directive() {} + EventHandlerTree(Tree *tree, id_type id) : EventHandlerStack(tree->callbacks()), m_tree(tree), m_id(id), m_num_directives(), m_yaml_directive() { - if(!loc.name.empty()) + reset(tree, id); + } + + void reset(Tree *tree, id_type id) + { + if(C4_UNLIKELY(!tree)) + _RYML_CB_ERR(m_stack.m_callbacks, "null tree"); + if(C4_UNLIKELY(id >= tree->capacity())) + _RYML_CB_ERR(tree->callbacks(), "invalid node"); + if(C4_UNLIKELY(!tree->is_root(id))) + if(C4_UNLIKELY(tree->is_map(tree->parent(id)))) + if(C4_UNLIKELY(!tree->has_key(id))) + _RYML_CB_ERR(tree->callbacks(), "destination node belongs to a map and has no key"); + m_tree = tree; + m_id = id; + if(m_tree->is_root(id)) { - fwrite(loc.name.str, 1, loc.name.len, f); - fputc(':', f); + _stack_reset_root(); + _reset_parser_state(m_curr, id, m_tree->root_id()); } - fprintf(f, "%zu:", loc.line); - if(loc.col) - fprintf(f, "%zu:", loc.col); - if(loc.offset) - fprintf(f, " (%zuB):", loc.offset); + else + { + _stack_reset_non_root(); + _reset_parser_state(m_parent, id, m_tree->parent(id)); + _reset_parser_state(m_curr, id, id); + } + m_num_directives = 0; + m_yaml_directive = false; } - fprintf(f, "%.*s\n", (int)length, msg); - fflush(f); -} -void error_impl(const char* msg, size_t length, Location loc, void * /*user_data*/) -{ - report_error_impl(msg, length, loc, nullptr); - ::abort(); -} + /** @} */ -void* allocate_impl(size_t length, void * /*hint*/, void * /*user_data*/) -{ - void *mem = ::malloc(length); - if(mem == nullptr) +public: + + /** @name parse events + * @{ */ + + void start_parse(const char* filename, detail::pfn_relocate_arena relocate_arena, void *relocate_arena_data) { - const char msg[] = "could not allocate memory"; - error_impl(msg, sizeof(msg)-1, {}, nullptr); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree != nullptr); + this->_stack_start_parse(filename, relocate_arena, relocate_arena_data); } - return mem; -} -void free_impl(void *mem, size_t /*length*/, void * /*user_data*/) -{ - ::free(mem); -} -#endif // RYML_NO_DEFAULT_CALLBACKS + void finish_parse() + { + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree != nullptr); + if(m_num_directives && !m_tree->is_stream(m_tree->root_id())) + _RYML_CB_ERR_(m_stack.m_callbacks, "directives cannot be used without a document", {}); + this->_stack_finish_parse(); + /* This pointer is temporary. Remember that: + * + * - this handler object may be held by the user + * - it may be used with a temporary tree inside the parse function + * - when the parse function returns the temporary tree, its address + * will change + * + * As a result, the user could try to read the tree from m_tree, and + * end up reading the stale temporary object. + * + * So it is better to clear it here; then the user will get an obvious + * segfault if reading from m_tree. */ + m_tree = nullptr; + } + void cancel_parse() + { + m_tree = nullptr; + } + /** @} */ -Callbacks::Callbacks() - : - m_user_data(nullptr), - #ifndef RYML_NO_DEFAULT_CALLBACKS - m_allocate(allocate_impl), - m_free(free_impl), - m_error(error_impl) - #else - m_allocate(nullptr), - m_free(nullptr), - m_error(nullptr) - #endif -{ -} +public: -Callbacks::Callbacks(void *user_data, pfn_allocate alloc_, pfn_free free_, pfn_error error_) - : - m_user_data(user_data), - #ifndef RYML_NO_DEFAULT_CALLBACKS - m_allocate(alloc_ ? alloc_ : allocate_impl), - m_free(free_ ? free_ : free_impl), - m_error(error_ ? error_ : error_impl) - #else - m_allocate(alloc_), - m_free(free_), - m_error(error_) - #endif -{ - C4_CHECK(m_allocate); - C4_CHECK(m_free); - C4_CHECK(m_error); -} + /** @name YAML stream events */ + /** @{ */ + C4_ALWAYS_INLINE void begin_stream() const noexcept { /*nothing to do*/ } -void set_callbacks(Callbacks const& c) -{ - s_default_callbacks = c; -} + C4_ALWAYS_INLINE void end_stream() const noexcept { /*nothing to do*/ } -Callbacks const& get_callbacks() -{ - return s_default_callbacks; -} + /** @} */ -void reset_callbacks() -{ - set_callbacks(Callbacks()); -} +public: -void error(const char *msg, size_t msg_len, Location loc) -{ - s_default_callbacks.m_error(msg, msg_len, loc, s_default_callbacks.m_user_data); -} + /** @name YAML document events */ + /** @{ */ -} // namespace yml -} // namespace c4 + /** implicit doc start (without ---) */ + void begin_doc() + { + _c4dbgp("begin_doc"); + if(_stack_should_push_on_begin_doc()) + { + _c4dbgp("push!"); + _set_root_as_stream(); + _push(); + _enable_(DOC); + } + } + /** implicit doc end (without ...) */ + void end_doc() + { + _c4dbgp("end_doc"); + if(_stack_should_pop_on_end_doc()) + { + _remove_speculative(); + _c4dbgp("pop!"); + _pop(); + } + } -#endif /* RYML_SINGLE_HDR_DEFINE_NOW */ + /** explicit doc start, with --- */ + void begin_doc_expl() + { + _c4dbgp("begin_doc_expl"); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->root_id() == m_curr->node_id); + if(!m_tree->is_stream(m_tree->root_id())) //if(_should_push_on_begin_doc()) + { + _c4dbgp("ensure stream"); + _set_root_as_stream(); + id_type first = m_tree->first_child(m_tree->root_id()); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_stream(m_tree->root_id())); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->num_children(m_tree->root_id()) == 1u); + if(m_tree->has_children(first) || m_tree->is_val(first)) + { + _c4dbgp("push!"); + _push(); + } + else + { + _c4dbgp("tweak"); + _push(); + _remove_speculative(); + m_curr->node_id = m_tree->last_child(m_tree->root_id()); + m_curr->tr_data = m_tree->_p(m_curr->node_id); + } + } + else + { + _c4dbgp("push!"); + _push(); + } + _enable_(DOC); + } + /** explicit doc end, with ... */ + void end_doc_expl() + { + _c4dbgp("end_doc_expl"); + _remove_speculative(); + if(_stack_should_pop_on_end_doc()) + { + _c4dbgp("pop!"); + _pop(); + } + m_yaml_directive = false; + } + /** @} */ -// (end https://github.com/biojppm/rapidyaml/src/c4/yml/common.cpp) +public: + /** @name YAML map events */ + /** @{ */ + void begin_map_key_flow() + { + _RYML_CB_ERR_(m_stack.m_callbacks, "ryml trees cannot handle containers as keys", m_curr->pos); + } + void begin_map_key_block() + { + _RYML_CB_ERR_(m_stack.m_callbacks, "ryml trees cannot handle containers as keys", m_curr->pos); + } -//******************************************************************************** -//-------------------------------------------------------------------------------- -// src/c4/yml/tree.cpp -// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.cpp -//-------------------------------------------------------------------------------- -//******************************************************************************** + void begin_map_val_flow() + { + _c4dbgpf("node[{}]: begin_map_val_flow", m_curr->node_id); + _RYML_CB_CHECK(m_stack.m_callbacks, !_has_any_(VAL)); + _enable_(MAP|FLOW_SL); + _save_loc(); + _push(); + } + void begin_map_val_block() + { + _c4dbgpf("node[{}]: begin_map_val_block", m_curr->node_id); + _RYML_CB_CHECK(m_stack.m_callbacks, !_has_any_(VAL)); + _enable_(MAP|BLOCK); + _save_loc(); + _push(); + } -#ifdef RYML_SINGLE_HDR_DEFINE_NOW -// amalgamate: removed include of -// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp -//#include "c4/yml/tree.hpp" -#if !defined(C4_YML_TREE_HPP_) && !defined(_C4_YML_TREE_HPP_) -#error "amalgamate: file c4/yml/tree.hpp must have been included at this point" -#endif /* C4_YML_TREE_HPP_ */ + void end_map() + { + _pop(); + _c4dbgpf("node[{}]: end_map_val", m_curr->node_id); + } -// amalgamate: removed include of -// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp -//#include "c4/yml/detail/parser_dbg.hpp" -#if !defined(C4_YML_DETAIL_PARSER_DBG_HPP_) && !defined(_C4_YML_DETAIL_PARSER_DBG_HPP_) -#error "amalgamate: file c4/yml/detail/parser_dbg.hpp must have been included at this point" -#endif /* C4_YML_DETAIL_PARSER_DBG_HPP_ */ + /** @} */ -// amalgamate: removed include of -// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp -//#include "c4/yml/node.hpp" -#if !defined(C4_YML_NODE_HPP_) && !defined(_C4_YML_NODE_HPP_) -#error "amalgamate: file c4/yml/node.hpp must have been included at this point" -#endif /* C4_YML_NODE_HPP_ */ +public: -// amalgamate: removed include of -// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/stack.hpp -//#include "c4/yml/detail/stack.hpp" -#if !defined(C4_YML_DETAIL_STACK_HPP_) && !defined(_C4_YML_DETAIL_STACK_HPP_) -#error "amalgamate: file c4/yml/detail/stack.hpp must have been included at this point" -#endif /* C4_YML_DETAIL_STACK_HPP_ */ + /** @name YAML seq events */ + /** @{ */ + void begin_seq_key_flow() + { + _RYML_CB_ERR_(m_stack.m_callbacks, "ryml trees cannot handle containers as keys", m_curr->pos); + } + void begin_seq_key_block() + { + _RYML_CB_ERR_(m_stack.m_callbacks, "ryml trees cannot handle containers as keys", m_curr->pos); + } + void begin_seq_val_flow() + { + _c4dbgpf("node[{}]: begin_seq_val_flow", m_curr->node_id); + _RYML_CB_CHECK(m_stack.m_callbacks, !_has_any_(VAL)); + _enable_(SEQ|FLOW_SL); + _save_loc(); + _push(); + } + void begin_seq_val_block() + { + _c4dbgpf("node[{}]: begin_seq_val_block", m_curr->node_id); + _RYML_CB_CHECK(m_stack.m_callbacks, !_has_any_(VAL)); + _enable_(SEQ|BLOCK); + _save_loc(); + _push(); + } -C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wtype-limits") -C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(4296/*expression is always 'boolean_value'*/) + void end_seq() + { + _pop(); + _c4dbgpf("node[{}]: end_seq_val", m_curr->node_id); + } -namespace c4 { -namespace yml { + /** @} */ +public: -csubstr normalize_tag(csubstr tag) -{ - YamlTag_e t = to_tag(tag); - if(t != TAG_NONE) - return from_tag(t); - if(tag.begins_with("!<")) - tag = tag.sub(1); - if(tag.begins_with("has_children(m_parent->node_id)); + NodeData const* prev = m_tree->m_buf; // watchout against relocation of the tree nodes + _set_state_(m_curr, m_tree->_append_child__unprotected(m_parent->node_id)); + if(prev != m_tree->m_buf) + _refresh_after_relocation(); + _c4dbgpf("node[{}]: added sibling={} prev={}", m_parent->node_id, m_curr->node_id, m_tree->prev_sibling(m_curr->node_id)); + } -YamlTag_e to_tag(csubstr tag) -{ - if(tag.begins_with("!<")) - tag = tag.sub(1); - if(tag.begins_with("!!")) - tag = tag.sub(2); - else if(tag.begins_with('!')) - return TAG_NONE; - else if(tag.begins_with("tag:yaml.org,2002:")) + /** set the previous val as the first key of a new map, with flow style. + * + * See the documentation for @ref doc_event_handlers, which has + * important notes about this event. + */ + void actually_val_is_first_key_of_new_map_flow() + { + if(C4_UNLIKELY(m_tree->is_container(m_curr->node_id))) + _RYML_CB_ERR_(m_stack.m_callbacks, "ryml trees cannot handle containers as keys", m_curr->pos); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_parent); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_seq(m_parent->node_id)); + _RYML_CB_ASSERT(m_stack.m_callbacks, !m_tree->is_container(m_curr->node_id)); + _RYML_CB_ASSERT(m_stack.m_callbacks, !m_tree->has_key(m_curr->node_id)); + const NodeData tmp = _val2key_(*m_curr->tr_data); + _disable_(_VALMASK|VAL_STYLE); + m_curr->tr_data->m_val = {}; + begin_map_val_flow(); + m_curr->tr_data->m_type = tmp.m_type; + m_curr->tr_data->m_key = tmp.m_key; + } + + /** like its flow counterpart, but this function can only be + * called after the end of a flow-val at root or doc level. + * + * See the documentation for @ref doc_event_handlers, which has + * important notes about this event. + */ + void actually_val_is_first_key_of_new_map_block() { - RYML_ASSERT(csubstr("tag:yaml.org,2002:").len == 18); - tag = tag.sub(18); + _RYML_CB_ERR_(m_stack.m_callbacks, "ryml trees cannot handle containers as keys", m_curr->pos); } - else if(tag.begins_with("node_id, scalar.len, scalar, reinterpret_cast(scalar.str)); + m_curr->tr_data->m_key.scalar = scalar; + _enable_(KEY|KEY_PLAIN); + } + C4_ALWAYS_INLINE void set_val_scalar_plain(csubstr scalar) noexcept + { + _c4dbgpf("node[{}]: set val scalar plain: [{}]~~~{}~~~ ({})", m_curr->node_id, scalar.len, scalar, reinterpret_cast(scalar.str)); + m_curr->tr_data->m_val.scalar = scalar; + _enable_(VAL|VAL_PLAIN); } - if(tag == "map") - return TAG_MAP; - else if(tag == "omap") - return TAG_OMAP; - else if(tag == "pairs") - return TAG_PAIRS; - else if(tag == "set") - return TAG_SET; - else if(tag == "seq") - return TAG_SEQ; - else if(tag == "binary") - return TAG_BINARY; - else if(tag == "bool") - return TAG_BOOL; - else if(tag == "float") - return TAG_FLOAT; - else if(tag == "int") - return TAG_INT; - else if(tag == "merge") - return TAG_MERGE; - else if(tag == "null") - return TAG_NULL; - else if(tag == "str") - return TAG_STR; - else if(tag == "timestamp") - return TAG_TIMESTAMP; - else if(tag == "value") - return TAG_VALUE; - return TAG_NONE; -} + C4_ALWAYS_INLINE void set_key_scalar_dquoted(csubstr scalar) noexcept + { + _c4dbgpf("node[{}]: set key scalar dquot: [{}]~~~{}~~~ ({})", m_curr->node_id, scalar.len, scalar, reinterpret_cast(scalar.str)); + m_curr->tr_data->m_key.scalar = scalar; + _enable_(KEY|KEY_DQUO); + } + C4_ALWAYS_INLINE void set_val_scalar_dquoted(csubstr scalar) noexcept + { + _c4dbgpf("node[{}]: set val scalar dquot: [{}]~~~{}~~~ ({})", m_curr->node_id, scalar.len, scalar, reinterpret_cast(scalar.str)); + m_curr->tr_data->m_val.scalar = scalar; + _enable_(VAL|VAL_DQUO); + } -csubstr from_tag_long(YamlTag_e tag) -{ - switch(tag) + + C4_ALWAYS_INLINE void set_key_scalar_squoted(csubstr scalar) noexcept { - case TAG_MAP: - return {""}; - case TAG_OMAP: - return {""}; - case TAG_PAIRS: - return {""}; - case TAG_SET: - return {""}; - case TAG_SEQ: - return {""}; - case TAG_BINARY: - return {""}; - case TAG_BOOL: - return {""}; - case TAG_FLOAT: - return {""}; - case TAG_INT: - return {""}; - case TAG_MERGE: - return {""}; - case TAG_NULL: - return {""}; - case TAG_STR: - return {""}; - case TAG_TIMESTAMP: - return {""}; - case TAG_VALUE: - return {""}; - case TAG_YAML: - return {""}; - case TAG_NONE: - return {""}; + _c4dbgpf("node[{}]: set key scalar squot: [{}]~~~{}~~~ ({})", m_curr->node_id, scalar.len, scalar, reinterpret_cast(scalar.str)); + m_curr->tr_data->m_key.scalar = scalar; + _enable_(KEY|KEY_SQUO); } - return {""}; -} - -csubstr from_tag(YamlTag_e tag) -{ - switch(tag) + C4_ALWAYS_INLINE void set_val_scalar_squoted(csubstr scalar) noexcept { - case TAG_MAP: - return {"!!map"}; - case TAG_OMAP: - return {"!!omap"}; - case TAG_PAIRS: - return {"!!pairs"}; - case TAG_SET: - return {"!!set"}; - case TAG_SEQ: - return {"!!seq"}; - case TAG_BINARY: - return {"!!binary"}; - case TAG_BOOL: - return {"!!bool"}; - case TAG_FLOAT: - return {"!!float"}; - case TAG_INT: - return {"!!int"}; - case TAG_MERGE: - return {"!!merge"}; - case TAG_NULL: - return {"!!null"}; - case TAG_STR: - return {"!!str"}; - case TAG_TIMESTAMP: - return {"!!timestamp"}; - case TAG_VALUE: - return {"!!value"}; - case TAG_YAML: - return {"!!yaml"}; - case TAG_NONE: - return {""}; + _c4dbgpf("node[{}]: set val scalar squot: [{}]~~~{}~~~ ({})", m_curr->node_id, scalar.len, scalar, reinterpret_cast(scalar.str)); + m_curr->tr_data->m_val.scalar = scalar; + _enable_(VAL|VAL_SQUO); } - return {""}; -} -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- + C4_ALWAYS_INLINE void set_key_scalar_literal(csubstr scalar) noexcept + { + _c4dbgpf("node[{}]: set key scalar literal: [{}]~~~{}~~~ ({})", m_curr->node_id, scalar.len, scalar, reinterpret_cast(scalar.str)); + m_curr->tr_data->m_key.scalar = scalar; + _enable_(KEY|KEY_LITERAL); + } + C4_ALWAYS_INLINE void set_val_scalar_literal(csubstr scalar) noexcept + { + _c4dbgpf("node[{}]: set val scalar literal: [{}]~~~{}~~~ ({})", m_curr->node_id, scalar.len, scalar, reinterpret_cast(scalar.str)); + m_curr->tr_data->m_val.scalar = scalar; + _enable_(VAL|VAL_LITERAL); + } -const char* NodeType::type_str(NodeType_e ty) -{ - switch(ty & _TYMASK) + + C4_ALWAYS_INLINE void set_key_scalar_folded(csubstr scalar) noexcept { - case KEYVAL: - return "KEYVAL"; - case KEY: - return "KEY"; - case VAL: - return "VAL"; - case MAP: - return "MAP"; - case SEQ: - return "SEQ"; - case KEYMAP: - return "KEYMAP"; - case KEYSEQ: - return "KEYSEQ"; - case DOCSEQ: - return "DOCSEQ"; - case DOCMAP: - return "DOCMAP"; - case DOCVAL: - return "DOCVAL"; - case DOC: - return "DOC"; - case STREAM: - return "STREAM"; - case NOTYPE: - return "NOTYPE"; - default: - if((ty & KEYVAL) == KEYVAL) - return "KEYVAL***"; - if((ty & KEYMAP) == KEYMAP) - return "KEYMAP***"; - if((ty & KEYSEQ) == KEYSEQ) - return "KEYSEQ***"; - if((ty & DOCSEQ) == DOCSEQ) - return "DOCSEQ***"; - if((ty & DOCMAP) == DOCMAP) - return "DOCMAP***"; - if((ty & DOCVAL) == DOCVAL) - return "DOCVAL***"; - if(ty & KEY) - return "KEY***"; - if(ty & VAL) - return "VAL***"; - if(ty & MAP) - return "MAP***"; - if(ty & SEQ) - return "SEQ***"; - if(ty & DOC) - return "DOC***"; - return "(unk)"; + _c4dbgpf("node[{}]: set key scalar folded: [{}]~~~{}~~~ ({})", m_curr->node_id, scalar.len, scalar, reinterpret_cast(scalar.str)); + m_curr->tr_data->m_key.scalar = scalar; + _enable_(KEY|KEY_FOLDED); + } + C4_ALWAYS_INLINE void set_val_scalar_folded(csubstr scalar) noexcept + { + _c4dbgpf("node[{}]: set val scalar folded: [{}]~~~{}~~~ ({})", m_curr->node_id, scalar.len, scalar, reinterpret_cast(scalar.str)); + m_curr->tr_data->m_val.scalar = scalar; + _enable_(VAL|VAL_FOLDED); } -} -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- + C4_ALWAYS_INLINE void mark_key_scalar_unfiltered() noexcept + { + _enable_(KEY_UNFILT); + } + C4_ALWAYS_INLINE void mark_val_scalar_unfiltered() noexcept + { + _enable_(VAL_UNFILT); + } -NodeRef Tree::rootref() -{ - return NodeRef(this, root_id()); -} -ConstNodeRef Tree::rootref() const -{ - return ConstNodeRef(this, root_id()); -} + /** @} */ -ConstNodeRef Tree::crootref() -{ - return ConstNodeRef(this, root_id()); -} -ConstNodeRef Tree::crootref() const -{ - return ConstNodeRef(this, root_id()); -} +public: -NodeRef Tree::ref(size_t id) -{ - _RYML_CB_ASSERT(m_callbacks, id != NONE && id >= 0 && id < m_size); - return NodeRef(this, id); -} -ConstNodeRef Tree::ref(size_t id) const -{ - _RYML_CB_ASSERT(m_callbacks, id != NONE && id >= 0 && id < m_size); - return ConstNodeRef(this, id); -} + /** @name YAML anchor/reference events */ + /** @{ */ -ConstNodeRef Tree::cref(size_t id) -{ - _RYML_CB_ASSERT(m_callbacks, id != NONE && id >= 0 && id < m_size); - return ConstNodeRef(this, id); -} -ConstNodeRef Tree::cref(size_t id) const -{ - _RYML_CB_ASSERT(m_callbacks, id != NONE && id >= 0 && id < m_size); - return ConstNodeRef(this, id); -} + void set_key_anchor(csubstr anchor) + { + _c4dbgpf("node[{}]: set key anchor: [{}]~~~{}~~~", m_curr->node_id, anchor.len, anchor); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree); + if(C4_UNLIKELY(_has_any_(KEYREF))) + _RYML_CB_ERR_(m_tree->callbacks(), "key cannot have both anchor and ref", m_curr->pos); + _RYML_CB_ASSERT(m_tree->callbacks(), !anchor.begins_with('&')); + _enable_(KEYANCH); + m_curr->tr_data->m_key.anchor = anchor; + } + void set_val_anchor(csubstr anchor) + { + _c4dbgpf("node[{}]: set val anchor: [{}]~~~{}~~~", m_curr->node_id, anchor.len, anchor); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree); + if(C4_UNLIKELY(_has_any_(VALREF))) + _RYML_CB_ERR_(m_tree->callbacks(), "val cannot have both anchor and ref", m_curr->pos); + _RYML_CB_ASSERT(m_tree->callbacks(), !anchor.begins_with('&')); + _enable_(VALANCH); + m_curr->tr_data->m_val.anchor = anchor; + } -NodeRef Tree::operator[] (csubstr key) -{ - return rootref()[key]; -} -ConstNodeRef Tree::operator[] (csubstr key) const -{ - return rootref()[key]; -} + void set_key_ref(csubstr ref) + { + _c4dbgpf("node[{}]: set key ref: [{}]~~~{}~~~", m_curr->node_id, ref.len, ref); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree); + if(C4_UNLIKELY(_has_any_(KEYANCH))) + _RYML_CB_ERR_(m_tree->callbacks(), "key cannot have both anchor and ref", m_curr->pos); + _RYML_CB_ASSERT(m_tree->callbacks(), ref.begins_with('*')); + _enable_(KEY|KEYREF); + m_curr->tr_data->m_key.anchor = ref.sub(1); + m_curr->tr_data->m_key.scalar = ref; + } + void set_val_ref(csubstr ref) + { + _c4dbgpf("node[{}]: set val ref: [{}]~~~{}~~~", m_curr->node_id, ref.len, ref); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree); + if(C4_UNLIKELY(_has_any_(VALANCH))) + _RYML_CB_ERR_(m_tree->callbacks(), "val cannot have both anchor and ref", m_curr->pos); + _RYML_CB_ASSERT(m_tree->callbacks(), ref.begins_with('*')); + _enable_(VAL|VALREF); + m_curr->tr_data->m_val.anchor = ref.sub(1); + m_curr->tr_data->m_val.scalar = ref; + } -NodeRef Tree::operator[] (size_t i) -{ - return rootref()[i]; -} -ConstNodeRef Tree::operator[] (size_t i) const -{ - return rootref()[i]; -} + /** @} */ -NodeRef Tree::docref(size_t i) -{ - return ref(doc(i)); -} -ConstNodeRef Tree::docref(size_t i) const -{ - return cref(doc(i)); -} +public: + /** @name YAML tag events */ + /** @{ */ -//----------------------------------------------------------------------------- -Tree::Tree(Callbacks const& cb) - : m_buf(nullptr) - , m_cap(0) - , m_size(0) - , m_free_head(NONE) - , m_free_tail(NONE) - , m_arena() - , m_arena_pos(0) - , m_callbacks(cb) -{ -} + void set_key_tag(csubstr tag) noexcept + { + _c4dbgpf("node[{}]: set key tag: [{}]~~~{}~~~", m_curr->node_id, tag.len, tag); + _enable_(KEYTAG); + m_curr->tr_data->m_key.tag = tag; + } + void set_val_tag(csubstr tag) noexcept + { + _c4dbgpf("node[{}]: set val tag: [{}]~~~{}~~~", m_curr->node_id, tag.len, tag); + _enable_(VALTAG); + m_curr->tr_data->m_val.tag = tag; + } -Tree::Tree(size_t node_capacity, size_t arena_capacity, Callbacks const& cb) - : Tree(cb) -{ - reserve(node_capacity); - reserve_arena(arena_capacity); -} + /** @} */ -Tree::~Tree() -{ - _free(); -} +public: + /** @name YAML directive events */ + /** @{ */ -Tree::Tree(Tree const& that) noexcept : Tree(that.m_callbacks) -{ - _copy(that); -} + C4_NO_INLINE void add_directive(csubstr directive) + { + _c4dbgpf("% directive! {}", directive); + _RYML_CB_ASSERT(m_tree->callbacks(), directive.begins_with('%')); + if(directive.begins_with("%TAG")) + { + if(C4_UNLIKELY(!m_tree->add_tag_directive(directive))) + _RYML_CB_ERR_(m_stack.m_callbacks, "failed to add directive", m_curr->pos); + } + else if(directive.begins_with("%YAML")) + { + _c4dbgpf("%YAML directive! ignoring...: {}", directive); + if(C4_UNLIKELY(m_yaml_directive)) + _RYML_CB_ERR_(m_stack.m_callbacks, "multiple yaml directives", m_curr->pos); + m_yaml_directive = true; + } + else + { + _c4dbgpf("unknown directive! ignoring... {}", directive); + } + ++m_num_directives; + } -Tree& Tree::operator= (Tree const& that) noexcept -{ - _free(); - m_callbacks = that.m_callbacks; - _copy(that); - return *this; -} + /** @} */ -Tree::Tree(Tree && that) noexcept : Tree(that.m_callbacks) -{ - _move(that); -} +public: -Tree& Tree::operator= (Tree && that) noexcept -{ - _free(); - m_callbacks = that.m_callbacks; - _move(that); - return *this; -} + /** @name arena functions */ + /** @{ */ -void Tree::_free() -{ - if(m_buf) + substr alloc_arena(size_t len) { - _RYML_CB_ASSERT(m_callbacks, m_cap > 0); - _RYML_CB_FREE(m_callbacks, m_buf, NodeData, m_cap); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree); + csubstr prev = m_tree->arena(); + substr out = m_tree->alloc_arena(len); + substr curr = m_tree->arena(); + if(curr.str != prev.str) + _stack_relocate_to_new_arena(prev, curr); + return out; } - if(m_arena.str) + + substr alloc_arena(size_t len, substr *relocated) { - _RYML_CB_ASSERT(m_callbacks, m_arena.len > 0); - _RYML_CB_FREE(m_callbacks, m_arena.str, char, m_arena.len); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree); + csubstr prev = m_tree->arena(); + if(!prev.is_super(*relocated)) + return alloc_arena(len); + substr out = alloc_arena(len); + substr curr = m_tree->arena(); + if(curr.str != prev.str) + *relocated = _stack_relocate_to_new_arena(*relocated, prev, curr); + return out; } - _clear(); -} + /** @} */ -C4_SUPPRESS_WARNING_GCC_PUSH -#if defined(__GNUC__) && __GNUC__>= 8 - C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wclass-memaccess") // error: ‘void* memset(void*, int, size_t)’ clearing an object of type ‘class c4::yml::Tree’ with no trivial copy-assignment; use assignment or value-initialization instead -#endif +public: -void Tree::_clear() -{ - m_buf = nullptr; - m_cap = 0; - m_size = 0; - m_free_head = 0; - m_free_tail = 0; - m_arena = {}; - m_arena_pos = 0; - for(size_t i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) - m_tag_directives[i] = {}; -} + /** @cond dev */ + void _reset_parser_state(state* st, id_type parse_root, id_type node) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree); + _set_state_(st, node); + const NodeType type = m_tree->type(node); + #ifdef RYML_DBG + char flagbuf[80]; + _c4dbgpf("resetting state: initial flags={}", detail::_parser_flags_to_str(flagbuf, st->flags)); + #endif + if(type == NOTYPE) + { + _c4dbgpf("node[{}] is notype", node); + if(m_tree->is_root(parse_root)) + { + _c4dbgpf("node[{}] is root", node); + st->flags |= RUNK|RTOP; + } + else + { + _c4dbgpf("node[{}] is not root. setting USTY", node); + st->flags |= USTY; + } + } + else if(type.is_map()) + { + _c4dbgpf("node[{}] is map", node); + st->flags |= RMAP|USTY; + } + else if(type.is_seq()) + { + _c4dbgpf("node[{}] is map", node); + st->flags |= RSEQ|USTY; + } + else if(type.has_key()) + { + _c4dbgpf("node[{}] has key. setting USTY", node); + st->flags |= USTY; + } + else + { + _RYML_CB_ERR(m_tree->callbacks(), "cannot append to node"); + } + if(type.is_doc()) + { + _c4dbgpf("node[{}] is doc", node); + st->flags |= RDOC; + } + #ifdef RYML_DBG + _c4dbgpf("resetting state: final flags={}", detail::_parser_flags_to_str(flagbuf, st->flags)); + #endif + } -void Tree::_copy(Tree const& that) -{ - _RYML_CB_ASSERT(m_callbacks, m_buf == nullptr); - _RYML_CB_ASSERT(m_callbacks, m_arena.str == nullptr); - _RYML_CB_ASSERT(m_callbacks, m_arena.len == 0); - m_buf = _RYML_CB_ALLOC_HINT(m_callbacks, NodeData, that.m_cap, that.m_buf); - memcpy(m_buf, that.m_buf, that.m_cap * sizeof(NodeData)); - m_cap = that.m_cap; - m_size = that.m_size; - m_free_head = that.m_free_head; - m_free_tail = that.m_free_tail; - m_arena_pos = that.m_arena_pos; - m_arena = that.m_arena; - if(that.m_arena.str) + /** push a new parent, add a child to the new parent, and set the + * child as the current node */ + void _push() { - _RYML_CB_ASSERT(m_callbacks, that.m_arena.len > 0); - substr arena; - arena.str = _RYML_CB_ALLOC_HINT(m_callbacks, char, that.m_arena.len, that.m_arena.str); - arena.len = that.m_arena.len; - _relocate(arena); // does a memcpy of the arena and updates nodes using the old arena - m_arena = arena; + _stack_push(); + NodeData const* prev = m_tree->m_buf; // watch out against relocation of the tree nodes + m_curr->node_id = m_tree->_append_child__unprotected(m_parent->node_id); + m_curr->tr_data = m_tree->_p(m_curr->node_id); + if(prev != m_tree->m_buf) + _refresh_after_relocation(); + _c4dbgpf("pushed! level={}. top is now node={} (parent={})", m_curr->level, m_curr->node_id, m_parent ? m_parent->node_id : NONE); + } + /** end the current scope */ + void _pop() + { + _remove_speculative_with_parent(); + _stack_pop(); } - for(size_t i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) - m_tag_directives[i] = that.m_tag_directives[i]; -} -void Tree::_move(Tree & that) -{ - _RYML_CB_ASSERT(m_callbacks, m_buf == nullptr); - _RYML_CB_ASSERT(m_callbacks, m_arena.str == nullptr); - _RYML_CB_ASSERT(m_callbacks, m_arena.len == 0); - m_buf = that.m_buf; - m_cap = that.m_cap; - m_size = that.m_size; - m_free_head = that.m_free_head; - m_free_tail = that.m_free_tail; - m_arena = that.m_arena; - m_arena_pos = that.m_arena_pos; - for(size_t i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) - m_tag_directives[i] = that.m_tag_directives[i]; - that._clear(); -} +public: -void Tree::_relocate(substr next_arena) -{ - _RYML_CB_ASSERT(m_callbacks, next_arena.not_empty()); - _RYML_CB_ASSERT(m_callbacks, next_arena.len >= m_arena.len); - memcpy(next_arena.str, m_arena.str, m_arena_pos); - for(NodeData *C4_RESTRICT n = m_buf, *e = m_buf + m_cap; n != e; ++n) + template C4_HOT C4_ALWAYS_INLINE void _enable__() noexcept { - if(in_arena(n->m_key.scalar)) - n->m_key.scalar = _relocated(n->m_key.scalar, next_arena); - if(in_arena(n->m_key.tag)) - n->m_key.tag = _relocated(n->m_key.tag, next_arena); - if(in_arena(n->m_key.anchor)) - n->m_key.anchor = _relocated(n->m_key.anchor, next_arena); - if(in_arena(n->m_val.scalar)) - n->m_val.scalar = _relocated(n->m_val.scalar, next_arena); - if(in_arena(n->m_val.tag)) - n->m_val.tag = _relocated(n->m_val.tag, next_arena); - if(in_arena(n->m_val.anchor)) - n->m_val.anchor = _relocated(n->m_val.anchor, next_arena); + m_curr->tr_data->m_type.type = static_cast(m_curr->tr_data->m_type.type | bits); } - for(TagDirective &C4_RESTRICT td : m_tag_directives) + template C4_HOT C4_ALWAYS_INLINE void _disable__() noexcept { - if(in_arena(td.prefix)) - td.prefix = _relocated(td.prefix, next_arena); - if(in_arena(td.handle)) - td.handle = _relocated(td.handle, next_arena); + m_curr->tr_data->m_type.type = static_cast(m_curr->tr_data->m_type.type & (~bits)); + } + template C4_HOT C4_ALWAYS_INLINE bool _has_any__() const noexcept + { + return (m_curr->tr_data->m_type.type & bits) != 0; } -} +public: -//----------------------------------------------------------------------------- -void Tree::reserve(size_t cap) -{ - if(cap > m_cap) + C4_ALWAYS_INLINE void _set_state_(state *C4_RESTRICT s, id_type id) noexcept { - NodeData *buf = _RYML_CB_ALLOC_HINT(m_callbacks, NodeData, cap, m_buf); - if(m_buf) - { - memcpy(buf, m_buf, m_cap * sizeof(NodeData)); - _RYML_CB_FREE(m_callbacks, m_buf, NodeData, m_cap); - } - size_t first = m_cap, del = cap - m_cap; - m_cap = cap; - m_buf = buf; - _clear_range(first, del); - if(m_free_head != NONE) - { - _RYML_CB_ASSERT(m_callbacks, m_buf != nullptr); - _RYML_CB_ASSERT(m_callbacks, m_free_tail != NONE); - m_buf[m_free_tail].m_next_sibling = first; - m_buf[first].m_prev_sibling = m_free_tail; - m_free_tail = cap-1; - } - else - { - _RYML_CB_ASSERT(m_callbacks, m_free_tail == NONE); - m_free_head = first; - m_free_tail = cap-1; - } - _RYML_CB_ASSERT(m_callbacks, m_free_head == NONE || (m_free_head >= 0 && m_free_head < cap)); - _RYML_CB_ASSERT(m_callbacks, m_free_tail == NONE || (m_free_tail >= 0 && m_free_tail < cap)); - - if( ! m_size) - _claim_root(); + s->node_id = id; + s->tr_data = m_tree->_p(id); + } + void _refresh_after_relocation() + { + _c4dbgp("tree: refreshing stack data after tree data relocation"); + for(auto &st : m_stack) + st.tr_data = m_tree->_p(st.node_id); } -} - -//----------------------------------------------------------------------------- -void Tree::clear() -{ - _clear_range(0, m_cap); - m_size = 0; - if(m_buf) + void _set_root_as_stream() { - _RYML_CB_ASSERT(m_callbacks, m_cap >= 0); - m_free_head = 0; - m_free_tail = m_cap-1; - _claim_root(); + _c4dbgp("set root as stream"); + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->root_id() == 0u); + _RYML_CB_ASSERT(m_tree->callbacks(), m_curr->node_id == 0u); + const bool hack = !m_tree->has_children(m_curr->node_id) && !m_tree->is_val(m_curr->node_id); + if(hack) + m_tree->_p(m_tree->root_id())->m_type.add(VAL); + m_tree->set_root_as_stream(); + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_stream(m_tree->root_id())); + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->has_children(m_tree->root_id())); + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_doc(m_tree->first_child(m_tree->root_id()))); + if(hack) + m_tree->_p(m_tree->first_child(m_tree->root_id()))->m_type.rem(VAL); + _set_state_(m_curr, m_tree->root_id()); } - else + + static NodeData _val2key_(NodeData const& C4_RESTRICT d) noexcept { - m_free_head = NONE; - m_free_tail = NONE; + NodeData r = d; + r.m_key = d.m_val; + r.m_val = {}; + r.m_type = d.m_type; + static_assert((_VALMASK >> 1u) == _KEYMASK, "required for this function to work"); + static_assert((VAL_STYLE >> 1u) == KEY_STYLE, "required for this function to work"); + r.m_type.type = ((d.m_type.type & (_VALMASK|VAL_STYLE)) >> 1u); + r.m_type.type = (r.m_type.type & ~(_VALMASK|VAL_STYLE)); + r.m_type.type = (r.m_type.type | KEY); + return r; } - for(size_t i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) - m_tag_directives[i] = {}; -} -void Tree::_claim_root() -{ - size_t r = _claim(); - _RYML_CB_ASSERT(m_callbacks, r == 0); - _set_hierarchy(r, NONE, NONE); -} + void _remove_speculative() + { + _c4dbgp("remove speculative node"); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree); + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->size() > 0); + const id_type last_added = m_tree->size() - 1; + if(m_tree->has_parent(last_added)) + if(m_tree->_p(last_added)->m_type == NOTYPE) + m_tree->remove(last_added); + } + void _remove_speculative_with_parent() + { + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree); + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->size() > 0); + const id_type last_added = m_tree->size() - 1; + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->has_parent(last_added)); + if(m_tree->_p(last_added)->m_type == NOTYPE) + { + _c4dbgpf("remove speculative node with parent. parent={} node={} parent(node)={}", m_parent->node_id, last_added, m_tree->parent(last_added)); + m_tree->remove(last_added); + } + } -//----------------------------------------------------------------------------- -void Tree::_clear_range(size_t first, size_t num) -{ - if(num == 0) - return; // prevent overflow when subtracting - _RYML_CB_ASSERT(m_callbacks, first >= 0 && first + num <= m_cap); - memset(m_buf + first, 0, num * sizeof(NodeData)); // TODO we should not need this - for(size_t i = first, e = first + num; i < e; ++i) + C4_ALWAYS_INLINE void _save_loc() { - _clear(i); - NodeData *n = m_buf + i; - n->m_prev_sibling = i - 1; - n->m_next_sibling = i + 1; + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree); + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->_p(m_curr->node_id)->m_val.scalar.len == 0); + m_tree->_p(m_curr->node_id)->m_val.scalar.str = m_curr->line_contents.rem.str; } - m_buf[first + num - 1].m_next_sibling = NONE; -} -C4_SUPPRESS_WARNING_GCC_POP +#undef _enable_ +#undef _disable_ +#undef _has_any_ + /** @endcond */ +}; -//----------------------------------------------------------------------------- -void Tree::_release(size_t i) -{ - _RYML_CB_ASSERT(m_callbacks, i >= 0 && i < m_cap); +/** @} */ - _rem_hierarchy(i); - _free_list_add(i); - _clear(i); +} // namespace yml +} // namespace c4 - --m_size; -} +C4_SUPPRESS_WARNING_MSVC_POP -//----------------------------------------------------------------------------- -// add to the front of the free list -void Tree::_free_list_add(size_t i) -{ - _RYML_CB_ASSERT(m_callbacks, i >= 0 && i < m_cap); - NodeData &C4_RESTRICT w = m_buf[i]; +#endif /* _C4_YML_EVENT_HANDLER_TREE_HPP_ */ - w.m_parent = NONE; - w.m_next_sibling = m_free_head; - w.m_prev_sibling = NONE; - if(m_free_head != NONE) - m_buf[m_free_head].m_prev_sibling = i; - m_free_head = i; - if(m_free_tail == NONE) - m_free_tail = m_free_head; -} -void Tree::_free_list_rem(size_t i) -{ - if(m_free_head == i) - m_free_head = _p(i)->m_next_sibling; - _rem_hierarchy(i); -} +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/event_handler_tree.hpp) -//----------------------------------------------------------------------------- -size_t Tree::_claim() -{ - if(m_free_head == NONE || m_buf == nullptr) - { - size_t sz = 2 * m_cap; - sz = sz ? sz : 16; - reserve(sz); - _RYML_CB_ASSERT(m_callbacks, m_free_head != NONE); - } - _RYML_CB_ASSERT(m_callbacks, m_size < m_cap); - _RYML_CB_ASSERT(m_callbacks, m_free_head >= 0 && m_free_head < m_cap); - size_t ichild = m_free_head; - NodeData *child = m_buf + ichild; +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/parse_engine.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/parse_engine.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** - ++m_size; - m_free_head = child->m_next_sibling; - if(m_free_head == NONE) - { - m_free_tail = NONE; - _RYML_CB_ASSERT(m_callbacks, m_size == m_cap); - } +#ifndef _C4_YML_PARSE_ENGINE_HPP_ +#define _C4_YML_PARSE_ENGINE_HPP_ - _clear(ichild); +#ifndef _C4_YML_DETAIL_PARSER_DBG_HPP_ +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp +//#include "c4/yml/detail/parser_dbg.hpp" +#if !defined(C4_YML_DETAIL_PARSER_DBG_HPP_) && !defined(_C4_YML_DETAIL_PARSER_DBG_HPP_) +#error "amalgamate: file c4/yml/detail/parser_dbg.hpp must have been included at this point" +#endif /* C4_YML_DETAIL_PARSER_DBG_HPP_ */ - return ichild; -} +#endif -//----------------------------------------------------------------------------- +#ifndef _C4_YML_PARSER_STATE_HPP_ +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/parser_state.hpp +//#include "c4/yml/parser_state.hpp" +#if !defined(C4_YML_PARSER_STATE_HPP_) && !defined(_C4_YML_PARSER_STATE_HPP_) +#error "amalgamate: file c4/yml/parser_state.hpp must have been included at this point" +#endif /* C4_YML_PARSER_STATE_HPP_ */ -C4_SUPPRESS_WARNING_GCC_PUSH -C4_SUPPRESS_WARNING_CLANG_PUSH -C4_SUPPRESS_WARNING_CLANG("-Wnull-dereference") -#if defined(__GNUC__) && (__GNUC__ >= 6) -C4_SUPPRESS_WARNING_GCC("-Wnull-dereference") #endif -void Tree::_set_hierarchy(size_t ichild, size_t iparent, size_t iprev_sibling) -{ - _RYML_CB_ASSERT(m_callbacks, iparent == NONE || (iparent >= 0 && iparent < m_cap)); - _RYML_CB_ASSERT(m_callbacks, iprev_sibling == NONE || (iprev_sibling >= 0 && iprev_sibling < m_cap)); - NodeData *C4_RESTRICT child = get(ichild); +#if defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable: 4251/*needs to have dll-interface to be used by clients of struct*/) +#endif - child->m_parent = iparent; - child->m_prev_sibling = NONE; - child->m_next_sibling = NONE; - if(iparent == NONE) - { - _RYML_CB_ASSERT(m_callbacks, ichild == 0); - _RYML_CB_ASSERT(m_callbacks, iprev_sibling == NONE); - } +namespace c4 { +namespace yml { - if(iparent == NONE) - return; +/** @addtogroup doc_parse + * @{ */ - size_t inext_sibling = iprev_sibling != NONE ? next_sibling(iprev_sibling) : first_child(iparent); - NodeData *C4_RESTRICT parent = get(iparent); - NodeData *C4_RESTRICT psib = get(iprev_sibling); - NodeData *C4_RESTRICT nsib = get(inext_sibling); +/** @defgroup doc_event_handlers Event Handlers + * + * @brief rapidyaml implements its parsing logic with a two-level + * model, where a @ref ParseEngine object reads through the YAML + * source, and dispatches events to an EventHandler bound to the @ref + * ParseEngine. Because @ref ParseEngine is templated on the event + * handler, the binding uses static polymorphism, without any virtual + * functions. The actual handler object can be changed at run time, + * (but of course needs to be the type of the template parameter). + * This is thus a very efficient architecture, and further enables the + * user to provide his own custom handler if he wishes to bypass the + * rapidyaml @ref Tree. + * + * There are two handlers implemented in this project: + * + * - @ref EventHandlerTree is the handler responsible for creating the + * ryml @ref Tree + * + * - @ref EventHandlerYamlStd is the handler responsible for emitting + * standardized [YAML test suite + * events](https://github.com/yaml/yaml-test-suite), used (only) in + * the CI of this project. + * + * + * ### Event model + * + * The event model used by the parse engine and event handlers follows + * very closely the event model in the [YAML test + * suite](https://github.com/yaml/yaml-test-suite). + * + * Consider for example this YAML, + * ```yaml + * {foo: bar,foo2: bar2} + * ``` + * which would produce these events in the test-suite parlance: + * ``` + * +STR + * +DOC + * +MAP {} + * =VAL :foo + * =VAL :bar + * =VAL :foo2 + * =VAL :bar2 + * -MAP + * -DOC + * -STR + * ``` + * + * For reference, the @ref ParseEngine object will produce this + * sequence of calls to its bound EventHandler: + * ```cpp + * handler.begin_stream(); + * handler.begin_doc(); + * handler.begin_map_val_flow(); + * handler.set_key_scalar_plain("foo"); + * handler.set_val_scalar_plain("bar"); + * handler.add_sibling(); + * handler.set_key_scalar_plain("foo2"); + * handler.set_val_scalar_plain("bar2"); + * handler.end_map(); + * handler.end_doc(); + * handler.end_stream(); + * ``` + * + * For many other examples of all areas of YAML and how ryml's parse + * model corresponds to the YAML standard model, refer to the [unit + * tests for the parse + * engine](https://github.com/biojppm/rapidyaml/tree/master/test/test_parse_engine.cpp). + * + * + * ### Special events + * + * Most of the parsing events adopted by rapidyaml in its event model + * are fairly obvious, but there are two less-obvious events requiring + * some explanation. + * + * These events exist to make it easier to parse some special YAML + * cases. They are called by the parser when a just-handled + * value/container is actually the first key of a new map: + * + * - `actually_val_is_first_key_of_new_map_flow()` (@ref EventHandlerTree::actually_val_is_first_key_of_new_map_flow() "see implementation in EventHandlerTree" / @ref EventHandlerYamlStd::actually_val_is_first_key_of_new_map_flow() "see implementation in EventHandlerYamlStd") + * - `actually_val_is_first_key_of_new_map_block()` (@ref EventHandlerTree::actually_val_is_first_key_of_new_map_block() "see implementation in EventHandlerTree" / @ref EventHandlerYamlStd::actually_val_is_first_key_of_new_map_block() "see implementation in EventHandlerYamlStd") + * + * For example, consider an implicit map inside a seq: `[a: b, c: + * d]` which is parsed as `[{a: b}, {c: d}]`. The standard event + * sequence for this YAML would be the following: + * ```cpp + * handler.begin_seq_val_flow(); + * handler.begin_map_val_flow(); + * handler.set_key_scalar_plain("a"); + * handler.set_val_scalar_plain("b"); + * handler.end_map(); + * handler.add_sibling(); + * handler.begin_map_val_flow(); + * handler.set_key_scalar_plain("c"); + * handler.set_val_scalar_plain("d"); + * handler.end_map(); + * handler.end_seq(); + * ``` + * The problem with this event sequence is that it forces the + * parser to delay setting the val scalar (in this case "a" and + * "c") until it knows whether the scalar is a key or a val. This + * would require the parser to store the scalar until this + * time. For instance, in the example above, the parser should + * delay setting "a" and "c", because they are in fact keys and + * not vals. Until then, the parser would have to store "a" and + * "c" in its internal state. The downside is that this complexity + * cost would apply even if there is no implicit map -- every val + * in a seq would have to be delayed until one of the + * disambiguating subsequent tokens `,-]:` is found. + * By calling this function, the parser can avoid this complexity, + * by preemptively setting the scalar as a val. Then a call to + * this function will create the map and rearrange the scalar as + * key. Now the cost applies only once: when a seqimap starts. So + * the following (easier and cheaper) event sequence below has the + * same effect as the event sequence above: + * ```cpp + * handler.begin_seq_val_flow(); + * handler.set_val_scalar_plain("notmap"); + * handler.set_val_scalar_plain("a"); // preemptively set "a" as val! + * handler.actually_as_new_map_key(); // create a map, move the "a" val as the key of the first child of the new map + * handler.set_val_scalar_plain("b"); // now "a" is a key and "b" the val + * handler.end_map(); + * handler.set_val_scalar_plain("c"); // "c" also as val! + * handler.actually_as_block_flow(); // likewise + * handler.set_val_scalar_plain("d"); // now "c" is a key and "b" the val + * handler.end_map(); + * handler.end_seq(); + * ``` + * This also applies to container keys (although ryml's tree + * cannot accomodate these): the parser can preemptively set a + * container as a val, and call this event to turn that container + * into a key. For example, consider this yaml: + * ```yaml + * [aa, bb]: [cc, dd] + * # ^ ^ ^ + * # | | | + * # (2) (1) (3) <- event sequence + * ``` + * The standard event sequence for this YAML would be the + * following: + * ```cpp + * handler.begin_map_val_block(); // (1) + * handler.begin_seq_key_flow(); // (2) + * handler.set_val_scalar_plain("aa"); + * handler.add_sibling(); + * handler.set_val_scalar_plain("bb"); + * handler.end_seq(); + * handler.begin_seq_val_flow(); // (3) + * handler.set_val_scalar_plain("cc"); + * handler.add_sibling(); + * handler.set_val_scalar_plain("dd"); + * handler.end_seq(); + * handler.end_map(); + * ``` + * The problem with the sequence above is that, reading from + * left-to-right, the parser can only detect the proper calls at + * (1) and (2) once it reaches (1) in the YAML source. So, the + * parser would have to buffer the entire event sequence starting + * from the beginning until it reaches (1). Using this function, + * the parser can do instead: + * ```cpp + * handler.begin_seq_val_flow(); // (2) -- preemptively as val! + * handler.set_val_scalar_plain("aa"); + * handler.add_sibling(); + * handler.set_val_scalar_plain("bb"); + * handler.end_seq(); + * handler.actually_as_new_map_key(); // (1) -- adjust when finding that the prev val was actually a key. + * handler.begin_seq_val_flow(); // (3) -- go on as before + * handler.set_val_scalar_plain("cc"); + * handler.add_sibling(); + * handler.set_val_scalar_plain("dd"); + * handler.end_seq(); + * handler.end_map(); + * ``` + */ - if(psib) - { - _RYML_CB_ASSERT(m_callbacks, next_sibling(iprev_sibling) == id(nsib)); - child->m_prev_sibling = id(psib); - psib->m_next_sibling = id(child); - _RYML_CB_ASSERT(m_callbacks, psib->m_prev_sibling != psib->m_next_sibling || psib->m_prev_sibling == NONE); - } +class Tree; +class NodeRef; +class ConstNodeRef; - if(nsib) - { - _RYML_CB_ASSERT(m_callbacks, prev_sibling(inext_sibling) == id(psib)); - child->m_next_sibling = id(nsib); - nsib->m_prev_sibling = id(child); - _RYML_CB_ASSERT(m_callbacks, nsib->m_prev_sibling != nsib->m_next_sibling || nsib->m_prev_sibling == NONE); - } - if(parent->m_first_child == NONE) - { - _RYML_CB_ASSERT(m_callbacks, parent->m_last_child == NONE); - parent->m_first_child = id(child); - parent->m_last_child = id(child); - } - else - { - if(child->m_next_sibling == parent->m_first_child) - parent->m_first_child = id(child); +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- - if(child->m_prev_sibling == parent->m_last_child) - parent->m_last_child = id(child); - } -} +/** Options to give to the parser to control its behavior. */ +struct RYML_EXPORT ParserOptions +{ +private: -C4_SUPPRESS_WARNING_GCC_POP -C4_SUPPRESS_WARNING_CLANG_POP + typedef enum : uint32_t { + SCALAR_FILTERING = (1u << 0), + LOCATIONS = (1u << 1), + DEFAULTS = SCALAR_FILTERING, + } Flags_e; + uint32_t flags = DEFAULTS; -//----------------------------------------------------------------------------- -void Tree::_rem_hierarchy(size_t i) -{ - _RYML_CB_ASSERT(m_callbacks, i >= 0 && i < m_cap); +public: - NodeData &C4_RESTRICT w = m_buf[i]; + ParserOptions() = default; - // remove from the parent - if(w.m_parent != NONE) - { - NodeData &C4_RESTRICT p = m_buf[w.m_parent]; - if(p.m_first_child == i) - { - p.m_first_child = w.m_next_sibling; - } - if(p.m_last_child == i) - { - p.m_last_child = w.m_prev_sibling; - } - } +public: - // remove from the used list - if(w.m_prev_sibling != NONE) - { - NodeData *C4_RESTRICT prev = get(w.m_prev_sibling); - prev->m_next_sibling = w.m_next_sibling; - } - if(w.m_next_sibling != NONE) + /** @name source location tracking */ + /** @{ */ + + /** enable/disable source location tracking */ + ParserOptions& locations(bool enabled) noexcept { - NodeData *C4_RESTRICT next = get(w.m_next_sibling); - next->m_prev_sibling = w.m_prev_sibling; + if(enabled) + flags |= LOCATIONS; + else + flags &= ~LOCATIONS; + return *this; } -} + /** query source location tracking status */ + C4_ALWAYS_INLINE bool locations() const noexcept { return (flags & LOCATIONS); } -//----------------------------------------------------------------------------- -void Tree::reorder() -{ - size_t r = root_id(); - _do_reorder(&r, 0); -} + /** @} */ -//----------------------------------------------------------------------------- -size_t Tree::_do_reorder(size_t *node, size_t count) -{ - // swap this node if it's not in place - if(*node != count) - { - _swap(*node, count); - *node = count; - } - ++count; // bump the count from this node +public: - // now descend in the hierarchy - for(size_t i = first_child(*node); i != NONE; i = next_sibling(i)) - { - // this child may have been relocated to a different index, - // so get an updated version - count = _do_reorder(&i, count); - } - return count; -} + /** @name scalar filtering status (experimental; disable at your discretion) */ + /** @{ */ -//----------------------------------------------------------------------------- -void Tree::_swap(size_t n_, size_t m_) -{ - _RYML_CB_ASSERT(m_callbacks, (parent(n_) != NONE) || type(n_) == NOTYPE); - _RYML_CB_ASSERT(m_callbacks, (parent(m_) != NONE) || type(m_) == NOTYPE); - NodeType tn = type(n_); - NodeType tm = type(m_); - if(tn != NOTYPE && tm != NOTYPE) - { - _swap_props(n_, m_); - _swap_hierarchy(n_, m_); - } - else if(tn == NOTYPE && tm != NOTYPE) - { - _copy_props(n_, m_); - _free_list_rem(n_); - _copy_hierarchy(n_, m_); - _clear(m_); - _free_list_add(m_); - } - else if(tn != NOTYPE && tm == NOTYPE) - { - _copy_props(m_, n_); - _free_list_rem(m_); - _copy_hierarchy(m_, n_); - _clear(n_); - _free_list_add(n_); - } - else + /** enable/disable scalar filtering while parsing */ + ParserOptions& scalar_filtering(bool enabled) noexcept { - C4_NEVER_REACH(); + if(enabled) + flags |= SCALAR_FILTERING; + else + flags &= ~SCALAR_FILTERING; + return *this; } -} + /** query scalar filtering status */ + C4_ALWAYS_INLINE bool scalar_filtering() const noexcept { return (flags & SCALAR_FILTERING); } + + /** @} */ +}; + +//----------------------------------------------------------------------------- //----------------------------------------------------------------------------- -void Tree::_swap_hierarchy(size_t ia, size_t ib) +//----------------------------------------------------------------------------- + +/** This is the main driver of parsing logic: it scans the YAML or + * JSON source for tokens, and emits the appropriate sequence of + * parsing events to its event handler. The parse engine itself has no + * special limitations, and *can* accomodate containers as keys; it is the + * event handler may introduce additional constraints. + * + * There are two implemented handlers (see @ref doc_event_handlers, + * which has important notes about the event model): + * + * - @ref EventHandlerTree is the handler responsible for creating the + * ryml @ref Tree + * + * - @ref EventHandlerYamlStd is the handler responsible for emitting + * standardized [YAML test suite + * events](https://github.com/yaml/yaml-test-suite), used (only) in + * the CI of this project. This is not part of the library and is + * not installed. + */ +template +class ParseEngine { - if(ia == ib) return; +public: + + using handler_type = EventHandler; + +public: + + /** @name construction and assignment */ + /** @{ */ + + ParseEngine(EventHandler *evt_handler, ParserOptions opts={}); + ~ParseEngine(); + + ParseEngine(ParseEngine &&); + ParseEngine(ParseEngine const&); + ParseEngine& operator=(ParseEngine &&); + ParseEngine& operator=(ParseEngine const&); + + /** @} */ + +public: + + /** @name modifiers */ + /** @{ */ - for(size_t i = first_child(ia); i != NONE; i = next_sibling(i)) + /** Reserve a certain capacity for the parsing stack. + * This should be larger than the expected depth of the parsed + * YAML tree. + * + * The parsing stack is the only (potential) heap memory used + * directly by the parser. + * + * If the requested capacity is below the default + * stack size of 16, the memory is used directly in the parser + * object; otherwise it will be allocated from the heap. + * + * @note this reserves memory only for the parser itself; all the + * allocations for the parsed tree will go through the tree's + * allocator (when different). + * + * @note for maximum efficiency, the tree and the arena can (and + * should) also be reserved. */ + void reserve_stack(id_type capacity) { - if(i == ib || i == ia) - continue; - _p(i)->m_parent = ib; + m_evt_handler->m_stack.reserve(capacity); } - for(size_t i = first_child(ib); i != NONE; i = next_sibling(i)) + /** Reserve a certain capacity for the array used to track node + * locations in the source buffer. */ + void reserve_locations(size_t num_source_lines) { - if(i == ib || i == ia) - continue; - _p(i)->m_parent = ia; + _resize_locations(num_source_lines); } - auto & C4_RESTRICT a = *_p(ia); - auto & C4_RESTRICT b = *_p(ib); - auto & C4_RESTRICT pa = *_p(a.m_parent); - auto & C4_RESTRICT pb = *_p(b.m_parent); + RYML_DEPRECATED("filter arena no longer needed") + void reserve_filter_arena(size_t) {} - if(&pa == &pb) - { - if((pa.m_first_child == ib && pa.m_last_child == ia) - || - (pa.m_first_child == ia && pa.m_last_child == ib)) - { - std::swap(pa.m_first_child, pa.m_last_child); - } - else - { - bool changed = false; - if(pa.m_first_child == ia) - { - pa.m_first_child = ib; - changed = true; - } - if(pa.m_last_child == ia) - { - pa.m_last_child = ib; - changed = true; - } - if(pb.m_first_child == ib && !changed) - { - pb.m_first_child = ia; - } - if(pb.m_last_child == ib && !changed) - { - pb.m_last_child = ia; - } - } - } - else - { - if(pa.m_first_child == ia) - pa.m_first_child = ib; - if(pa.m_last_child == ia) - pa.m_last_child = ib; + /** @} */ + +public: + + /** @name getters */ + /** @{ */ + + /** Get the options used to build this parser object. */ + ParserOptions const& options() const { return m_options; } + + /** Get the current callbacks in the parser. */ + Callbacks const& callbacks() const { RYML_ASSERT(m_evt_handler); return m_evt_handler->m_stack.m_callbacks; } + + /** Get the name of the latest file parsed by this object. */ + csubstr filename() const { return m_file; } + + /** Get the latest YAML buffer parsed by this object. */ + csubstr source() const { return m_buf; } + + id_type stack_capacity() const { RYML_ASSERT(m_evt_handler); return m_evt_handler->m_stack.capacity(); } + size_t locations_capacity() const { return m_newline_offsets_capacity; } + + RYML_DEPRECATED("filter arena no longer needed") + size_t filter_arena_capacity() const { return 0u; } + + /** @} */ + +public: + + /** @name parse methods */ + /** @{ */ + + /** parse YAML in place, emitting events to the current handler */ + void parse_in_place_ev(csubstr filename, substr src); + + /** parse JSON in place, emitting events to the current handler */ + void parse_json_in_place_ev(csubstr filename, substr src); + + /** @} */ + +public: + + /** @name deprecated parse methods + * @{ */ + + /** @cond dev */ + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if::type parse_in_place(csubstr filename, substr yaml, Tree *t, size_t node_id); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if::type parse_in_place( substr yaml, Tree *t, size_t node_id); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if::type parse_in_place(csubstr filename, substr yaml, Tree *t ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if::type parse_in_place( substr yaml, Tree *t ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if::type parse_in_place(csubstr filename, substr yaml, NodeRef node ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if::type parse_in_place( substr yaml, NodeRef node ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if::type parse_in_place(csubstr filename, substr yaml ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if::type parse_in_place( substr yaml ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if::type parse_in_arena(csubstr filename, csubstr yaml, Tree *t, size_t node_id); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if::type parse_in_arena( csubstr yaml, Tree *t, size_t node_id); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if::type parse_in_arena(csubstr filename, csubstr yaml, Tree *t ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if::type parse_in_arena( csubstr yaml, Tree *t ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if::type parse_in_arena(csubstr filename, csubstr yaml, NodeRef node ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if::type parse_in_arena( csubstr yaml, NodeRef node ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if::type parse_in_arena(csubstr filename, csubstr yaml ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if::type parse_in_arena( csubstr yaml ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr version in parse.hpp.") typename std::enable_if::type parse_in_arena(csubstr filename, substr yaml, Tree *t, size_t node_id); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr version in parse.hpp.") typename std::enable_if::type parse_in_arena( substr yaml, Tree *t, size_t node_id); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr version in parse.hpp.") typename std::enable_if::type parse_in_arena(csubstr filename, substr yaml, Tree *t ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr version in parse.hpp.") typename std::enable_if::type parse_in_arena( substr yaml, Tree *t ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr version in parse.hpp.") typename std::enable_if::type parse_in_arena(csubstr filename, substr yaml, NodeRef node ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr version in parse.hpp.") typename std::enable_if::type parse_in_arena( substr yaml, NodeRef node ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr version in parse.hpp.") typename std::enable_if::type parse_in_arena(csubstr filename, substr yaml ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr version in parse.hpp.") typename std::enable_if::type parse_in_arena( substr yaml ); + /** @endcond */ + + /** @} */ + +public: + + /** @name locations */ + /** @{ */ + + /** Get the location of a node of the last tree to be parsed by this parser. */ + Location location(Tree const& tree, id_type node_id) const; + /** Get the location of a node of the last tree to be parsed by this parser. */ + Location location(ConstNodeRef node) const; + /** Get the string starting at a particular location, to the end + * of the parsed source buffer. */ + csubstr location_contents(Location const& loc) const; + /** Given a pointer to a buffer position, get the location. + * @param[in] val must be pointing to somewhere in the source + * buffer that was last parsed by this object. */ + Location val_location(const char *val) const; + + /** @} */ + +public: + + /** @name scalar filtering */ + /** @{*/ + + /** filter a plain scalar */ + FilterResult filter_scalar_plain(csubstr scalar, substr dst, size_t indentation); + /** filter a plain scalar in place */ + FilterResult filter_scalar_plain_in_place(substr scalar, size_t cap, size_t indentation); + + /** filter a single-quoted scalar */ + FilterResult filter_scalar_squoted(csubstr scalar, substr dst); + /** filter a single-quoted scalar in place */ + FilterResult filter_scalar_squoted_in_place(substr scalar, size_t cap); + + /** filter a double-quoted scalar */ + FilterResult filter_scalar_dquoted(csubstr scalar, substr dst); + /** filter a double-quoted scalar in place */ + FilterResultExtending filter_scalar_dquoted_in_place(substr scalar, size_t cap); + + /** filter a block-literal scalar */ + FilterResult filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp); + /** filter a block-literal scalar in place */ + FilterResult filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp); + + /** filter a block-folded scalar */ + FilterResult filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp); + /** filter a block-folded scalar in place */ + FilterResult filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp); + + /** @} */ + +private: + + struct ScannedScalar + { + substr scalar; + bool needs_filter; + }; + + struct ScannedBlock + { + substr scalar; + size_t indentation; + BlockChomp_e chomp; + }; + + bool _is_doc_begin(csubstr s); + bool _is_doc_end(csubstr s); + + bool _scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc, size_t indentation); + bool _scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc); + bool _scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc); + bool _scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc); + bool _scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc); + bool _scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc); + bool _scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc); + bool _scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc); + bool _is_valid_start_scalar_plain_flow(csubstr s); + + ScannedScalar _scan_scalar_squot(); + ScannedScalar _scan_scalar_dquot(); + + void _scan_block(ScannedBlock *C4_RESTRICT sb, size_t indref); + + csubstr _scan_anchor(); + csubstr _scan_ref_seq(); + csubstr _scan_ref_map(); + csubstr _scan_tag(); + +public: // exposed for testing + + /** @cond dev */ + csubstr _filter_scalar_plain(substr s, size_t indentation); + csubstr _filter_scalar_squot(substr s); + csubstr _filter_scalar_dquot(substr s); + csubstr _filter_scalar_literal(substr s, size_t indentation, BlockChomp_e chomp); + csubstr _filter_scalar_folded(substr s, size_t indentation, BlockChomp_e chomp); + + csubstr _maybe_filter_key_scalar_plain(ScannedScalar const& sc, size_t indendation); + csubstr _maybe_filter_val_scalar_plain(ScannedScalar const& sc, size_t indendation); + csubstr _maybe_filter_key_scalar_squot(ScannedScalar const& sc); + csubstr _maybe_filter_val_scalar_squot(ScannedScalar const& sc); + csubstr _maybe_filter_key_scalar_dquot(ScannedScalar const& sc); + csubstr _maybe_filter_val_scalar_dquot(ScannedScalar const& sc); + csubstr _maybe_filter_key_scalar_literal(ScannedBlock const& sb); + csubstr _maybe_filter_val_scalar_literal(ScannedBlock const& sb); + csubstr _maybe_filter_key_scalar_folded(ScannedBlock const& sb); + csubstr _maybe_filter_val_scalar_folded(ScannedBlock const& sb); + /** @endcond */ + +private: + + void _handle_map_block(); + void _handle_seq_block(); + void _handle_map_flow(); + void _handle_seq_flow(); + void _handle_seq_imap(); + void _handle_map_json(); + void _handle_seq_json(); + + void _handle_unk(); + void _handle_unk_json(); + void _handle_usty(); + + void _handle_flow_skip_whitespace(); + + void _end_map_blck(); + void _end_seq_blck(); + void _end2_map(); + void _end2_seq(); + + void _begin2_doc(); + void _begin2_doc_expl(); + void _end2_doc(); + void _end2_doc_expl(); + + void _maybe_begin_doc(); + void _maybe_end_doc(); + + void _start_doc_suddenly(); + void _end_doc_suddenly(); + void _end_doc_suddenly__pop(); + void _end_stream(); + + void _set_indentation(size_t indentation); + void _save_indentation(); + void _handle_indentation_pop_from_block_seq(); + void _handle_indentation_pop_from_block_map(); + void _handle_indentation_pop(ParserState const* dst); + + void _maybe_skip_comment(); + void _skip_comment(); + void _maybe_skip_whitespace_tokens(); + void _maybe_skipchars(char c); + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + void _maybe_skipchars_up_to(char c, size_t max_to_skip); + #endif + template + void _skipchars(const char (&chars)[N]); + bool _maybe_scan_following_colon() noexcept; + bool _maybe_scan_following_comma() noexcept; + +public: + + /** @cond dev */ + template auto _filter_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation) -> decltype(proc.result()); + template auto _filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result()); + template auto _filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result()); + template auto _filter_block_literal(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result()); + template auto _filter_block_folded(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result()); + /** @endcond */ + +public: + + /** @cond dev */ + template void _filter_nl_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation); + template void _filter_nl_squoted(FilterProcessor &C4_RESTRICT proc); + template void _filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc); + + template bool _filter_ws_handle_to_first_non_space(FilterProcessor &C4_RESTRICT proc); + template void _filter_ws_copy_trailing(FilterProcessor &C4_RESTRICT proc); + template void _filter_ws_skip_trailing(FilterProcessor &C4_RESTRICT proc); + + template void _filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc); + + template void _filter_chomp(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp, size_t indentation); + template size_t _handle_all_whitespace(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp); + template size_t _extend_to_chomp(FilterProcessor &C4_RESTRICT proc, size_t contents_len); + template void _filter_block_indentation(FilterProcessor &C4_RESTRICT proc, size_t indentation); + template void _filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len); + template size_t _filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc, size_t num_newl, size_t wpos_at_first_newl); + template void _filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len); + template void _filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len, size_t curr_indentation) noexcept; + + /** @endcond */ + +private: + + void _line_progressed(size_t ahead); + void _line_ended(); + void _line_ended_undo(); + + bool _finished_file() const; + bool _finished_line() const; + + void _scan_line(); + substr _peek_next_line(size_t pos=npos) const; + + inline bool _at_line_begin() const + { + return m_evt_handler->m_curr->line_contents.rem.begin() == m_evt_handler->m_curr->line_contents.full.begin(); + } + + void _relocate_arena(csubstr prev_arena, substr next_arena); + static void _s_relocate_arena(void*, csubstr prev_arena, substr next_arena); + +private: + + C4_ALWAYS_INLINE bool has_all(ParserFlag_t f) const noexcept { return (m_evt_handler->m_curr->flags & f) == f; } + C4_ALWAYS_INLINE bool has_any(ParserFlag_t f) const noexcept { return (m_evt_handler->m_curr->flags & f) != 0; } + C4_ALWAYS_INLINE bool has_none(ParserFlag_t f) const noexcept { return (m_evt_handler->m_curr->flags & f) == 0; } + static C4_ALWAYS_INLINE bool has_all(ParserFlag_t f, ParserState const* C4_RESTRICT s) noexcept { return (s->flags & f) == f; } + static C4_ALWAYS_INLINE bool has_any(ParserFlag_t f, ParserState const* C4_RESTRICT s) noexcept { return (s->flags & f) != 0; } + static C4_ALWAYS_INLINE bool has_none(ParserFlag_t f, ParserState const* C4_RESTRICT s) noexcept { return (s->flags & f) == 0; } + + #ifndef RYML_DBG + C4_ALWAYS_INLINE static void add_flags(ParserFlag_t on, ParserState *C4_RESTRICT s) noexcept { s->flags |= on; } + C4_ALWAYS_INLINE static void addrem_flags(ParserFlag_t on, ParserFlag_t off, ParserState *C4_RESTRICT s) noexcept { s->flags &= ~off; s->flags |= on; } + C4_ALWAYS_INLINE static void rem_flags(ParserFlag_t off, ParserState *C4_RESTRICT s) noexcept { s->flags &= ~off; } + C4_ALWAYS_INLINE void add_flags(ParserFlag_t on) noexcept { m_evt_handler->m_curr->flags |= on; } + C4_ALWAYS_INLINE void addrem_flags(ParserFlag_t on, ParserFlag_t off) noexcept { m_evt_handler->m_curr->flags &= ~off; m_evt_handler->m_curr->flags |= on; } + C4_ALWAYS_INLINE void rem_flags(ParserFlag_t off) noexcept { m_evt_handler->m_curr->flags &= ~off; } + #else + static void add_flags(ParserFlag_t on, ParserState *C4_RESTRICT s); + static void addrem_flags(ParserFlag_t on, ParserFlag_t off, ParserState *C4_RESTRICT s); + static void rem_flags(ParserFlag_t off, ParserState *C4_RESTRICT s); + C4_ALWAYS_INLINE void add_flags(ParserFlag_t on) noexcept { add_flags(on, m_evt_handler->m_curr); } + C4_ALWAYS_INLINE void addrem_flags(ParserFlag_t on, ParserFlag_t off) noexcept { addrem_flags(on, off, m_evt_handler->m_curr); } + C4_ALWAYS_INLINE void rem_flags(ParserFlag_t off) noexcept { rem_flags(off, m_evt_handler->m_curr); } + #endif + +private: + + void _prepare_locations(); + void _resize_locations(size_t sz); + bool _locations_dirty() const; + + bool _location_from_cont(Tree const& tree, id_type node, Location *C4_RESTRICT loc) const; + bool _location_from_node(Tree const& tree, id_type node, Location *C4_RESTRICT loc, id_type level) const; + +private: + + void _reset(); + void _free(); + void _clr(); + + #ifdef RYML_DBG + template void _dbg(csubstr fmt, Args const& C4_RESTRICT ...args) const; + #endif + template void _err(csubstr fmt, Args const& C4_RESTRICT ...args) const; + template void _errloc(csubstr fmt, Location const& loc, Args const& C4_RESTRICT ...args) const; + + template void _fmt_msg(DumpFn &&dumpfn) const; + +private: + + /** store pending tag or anchor/ref annotations */ + struct Annotation + { + struct Entry + { + csubstr str; + size_t indentation; + size_t line; + }; + Entry annotations[2]; + size_t num_entries; + }; + + void _add_annotation(Annotation *C4_RESTRICT dst, csubstr str, size_t indentation, size_t line); + void _clear_annotations(Annotation *C4_RESTRICT dst); + bool _has_pending_annotations() const { return m_pending_tags.num_entries || m_pending_anchors.num_entries; } + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + bool _handle_indentation_from_annotations(); + #endif + bool _annotations_require_key_container() const; + void _handle_annotations_before_blck_key_scalar(); + void _handle_annotations_before_blck_val_scalar(); + void _handle_annotations_before_start_mapblck(size_t current_line); + void _handle_annotations_before_start_mapblck_as_key(); + void _handle_annotations_and_indentation_after_start_mapblck(size_t key_indentation, size_t key_line); + size_t _select_indentation_from_annotations(size_t val_indentation, size_t val_line); + void _handle_directive(csubstr rem); + + void _check_tag(csubstr tag); + +private: + + ParserOptions m_options; + + csubstr m_file; + substr m_buf; + +public: + + /** @cond dev */ + EventHandler *C4_RESTRICT m_evt_handler; + /** @endcond */ + +private: + + Annotation m_pending_anchors; + Annotation m_pending_tags; + + bool m_was_inside_qmrk; + bool m_doc_empty = true; + +private: + + size_t *m_newline_offsets; + size_t m_newline_offsets_size; + size_t m_newline_offsets_capacity; + csubstr m_newline_offsets_buf; + +}; + +/** @cond dev */ +RYML_EXPORT C4_NO_INLINE size_t _find_last_newline_and_larger_indentation(csubstr s, size_t indentation) noexcept; +/** @endcond */ + + +/** Quickly inspect the source to estimate the number of nodes the + * resulting tree is likely have. If a tree is empty before + * parsing, considerable time will be spent growing it, so calling + * this to reserve the tree size prior to parsing is likely to + * result in a time gain. We encourage using this method before + * parsing, but as always measure its impact in performance to + * obtain a good trade-off. + * + * @note since this method is meant for optimizing performance, it + * is approximate. The result may be actually smaller than the + * resulting number of nodes, notably if the YAML uses implicit + * maps as flow seq members as in `[these: are, individual: + * maps]`. */ +RYML_EXPORT id_type estimate_tree_capacity(csubstr src); + +/** @} */ + +} // namespace yml +} // namespace c4 + +#if defined(_MSC_VER) +# pragma warning(pop) +#endif + +#endif /* _C4_YML_PARSE_ENGINE_HPP_ */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/parse_engine.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/preprocess.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/preprocess.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_PREPROCESS_HPP_ +#define _C4_YML_PREPROCESS_HPP_ + +/** @file preprocess.hpp Functions for preprocessing YAML prior to parsing. */ + +#ifndef _C4_YML_COMMON_HPP_ +//included above: +//#include "./common.hpp" +#endif +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/substr.hpp +//#include +#if !defined(C4_SUBSTR_HPP_) && !defined(_C4_SUBSTR_HPP_) +#error "amalgamate: file c4/substr.hpp must have been included at this point" +#endif /* C4_SUBSTR_HPP_ */ + + + +namespace c4 { +namespace yml { + +/** @addtogroup doc_preprocessors + * @{ + */ + +/** @cond dev */ +namespace detail { +using Preprocessor = size_t(csubstr, substr); +template +substr preprocess_into_container(csubstr input, CharContainer *out) +{ + // try to write once. the preprocessor will stop writing at the end of + // the container, but will process all the input to determine the + // required container size. + size_t sz = PP(input, to_substr(*out)); + // if the container size is not enough, resize, and run again in the + // resized container + if(sz > out->size()) + { + out->resize(sz); + sz = PP(input, to_substr(*out)); + } + return to_substr(*out).first(sz); +} +} // namespace detail +/** @endcond */ + + +//----------------------------------------------------------------------------- + +/** @defgroup doc_preprocess_rxmap preprocess_rxmap + * + * @brief Convert flow-type relaxed maps (with implicit bools) into strict YAML + * flow map: + * + * @code{.yaml} + * {a, b, c, d: [e, f], g: {a, b}} + * # is converted into this: + * {a: 1, b: 1, c: 1, d: [e, f], g: {a, b}} + * @endcode + + * @note this is NOT recursive - conversion happens only in the top-level map + * @param rxmap A relaxed map + * @param buf output buffer + * @param out output container + * + * @{ + */ + +/** Write into a given output buffer. This function is safe to call with + * empty or small buffers; it won't write beyond the end of the buffer. + * + * @return the number of characters required for output + */ +RYML_EXPORT size_t preprocess_rxmap(csubstr rxmap, substr buf); + + +/** Write into an existing container. It is resized to contained the output. + * @return a substr of the container + * @overload preprocess_rxmap */ +template +substr preprocess_rxmap(csubstr rxmap, CharContainer *out) +{ + return detail::preprocess_into_container(rxmap, out); +} + + +/** Create a container with the result. + * @overload preprocess_rxmap */ +template +CharContainer preprocess_rxmap(csubstr rxmap) +{ + CharContainer out; + preprocess_rxmap(rxmap, &out); + return out; +} + +/** @} */ // preprocess_rxmap +/** @} */ // group + +} // namespace yml +} // namespace c4 + +#endif /* _C4_YML_PREPROCESS_HPP_ */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/preprocess.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/reference_resolver.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/reference_resolver.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_REFERENCE_RESOLVER_HPP_ +#define _C4_YML_REFERENCE_RESOLVER_HPP_ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp +//#include "c4/yml/tree.hpp" +#if !defined(C4_YML_TREE_HPP_) && !defined(_C4_YML_TREE_HPP_) +#error "amalgamate: file c4/yml/tree.hpp must have been included at this point" +#endif /* C4_YML_TREE_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/stack.hpp +//#include "c4/yml/detail/stack.hpp" +#if !defined(C4_YML_DETAIL_STACK_HPP_) && !defined(_C4_YML_DETAIL_STACK_HPP_) +#error "amalgamate: file c4/yml/detail/stack.hpp must have been included at this point" +#endif /* C4_YML_DETAIL_STACK_HPP_ */ + + + +namespace c4 { +namespace yml { + +/** @addtogroup doc_ref_utils + * @{ + */ + +/** Reusable object to resolve references/aliases in the tree. */ +struct RYML_EXPORT ReferenceResolver +{ + ReferenceResolver() = default; + + /** Resolve references: for each reference, look for a matching + * anchor, and copy its contents to the ref node. + * + * This method first does a full traversal of the tree to gather + * all anchors and references in a separate collection, then it + * goes through that collection to locate the names, which it does + * by obeying the YAML standard diktat that "an alias node refers + * to the most recent node in the serialization having the + * specified anchor" + * + * So, depending on the number of anchor/alias nodes, this is a + * potentially expensive operation, with a best-case linear + * complexity (from the initial traversal). + * + * @todo verify sanity against anchor-ref attacks (https://en.wikipedia.org/wiki/Billion_laughs_attack ) + */ + void resolve(Tree *t_); + +public: + + /** @cond dev */ + + struct RefData + { + NodeType type; + id_type node; + id_type prev_anchor; + id_type target; + id_type parent_ref; + id_type parent_ref_sibling; + }; + + void reset_(Tree *t_); + void gather_anchors_and_refs_(); + void gather_anchors_and_refs__(id_type n); + id_type count_anchors_and_refs_(id_type n); + + id_type lookup_(RefData *C4_RESTRICT ra); + +public: + + Tree *C4_RESTRICT m_tree; + /** We're using this stack purely as an array. */ + detail::stack m_refs; + + /** @endcond */ +}; + +/** @} */ + +} // namespace ryml +} // namespace c4 + + +#endif // _C4_YML_REFERENCE_RESOLVER_HPP_ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/reference_resolver.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/parse.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/parse.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_PARSE_HPP_ +#define _C4_YML_PARSE_HPP_ + +#ifndef _C4_YML_COMMON_HPP_ +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/common.hpp +//#include "c4/yml/common.hpp" +#if !defined(C4_YML_COMMON_HPP_) && !defined(_C4_YML_COMMON_HPP_) +#error "amalgamate: file c4/yml/common.hpp must have been included at this point" +#endif /* C4_YML_COMMON_HPP_ */ + +#endif + +namespace c4 { +namespace yml { + +class Tree; +class NodeRef; +template class ParseEngine; +struct EventHandlerTree; +RYML_EXPORT id_type estimate_tree_capacity(csubstr src); + + +/** @addtogroup doc_parse + * @{ */ + +/** This is the main ryml parser, where the parser events are handled + * to create a ryml tree. + * + * @warning Because the ryml @ref Tree does not accept containers as + * keys, this class cannot successfully parse YAML source with this + * feature. See @ref ParseEngine for more details. + * + * @see ParserOptions + * @see ParseEngine + * @see EventHandlerTree + * */ +using Parser = RYML_EXPORT ParseEngine; + + +//----------------------------------------------------------------------------- + +/** @defgroup doc_parse_in_place__with_existing_parser Parse in place with existing parser + * + * @brief parse a mutable YAML source buffer. Scalars requiring + * filtering are mutated in place (except in the rare cases where the + * filtered scalar is longer than the original scalar, or where + * filtering was disabled before the call). These overloads accept an + * existing parser object, and provide the opportunity to use special + * parser options. + * + * @see ParserOptions + * + * @{ + */ + +// this is vertically aligned to highlight the parameter differences. + +RYML_EXPORT void parse_in_place(Parser *parser, csubstr filename, substr yaml, Tree *t, id_type node_id); /**< (1) parse YAML into an existing tree node. + * + * The filename will be used in any error messages + * arising during the parse. The callbacks in the + * tree are kept, and used to allocate + * the tree members, if any allocation is required. */ +RYML_EXPORT void parse_in_place(Parser *parser, substr yaml, Tree *t, id_type node_id); /**< (2) like (1) but no filename will be reported */ +RYML_EXPORT void parse_in_place(Parser *parser, csubstr filename, substr yaml, Tree *t ); /**< (3) parse YAML into an existing tree, into its root node. + * + * The filename will be used in any error messages + * arising during the parse. The callbacks in the + * tree are kept, and used to allocate + * the tree members, if any allocation is required. */ +RYML_EXPORT void parse_in_place(Parser *parser, substr yaml, Tree *t ); /**< (4) like (3) but no filename will be reported */ +RYML_EXPORT void parse_in_place(Parser *parser, csubstr filename, substr yaml, NodeRef node ); /**< (5) like (1) but the node is given as a NodeRef */ +RYML_EXPORT void parse_in_place(Parser *parser, substr yaml, NodeRef node ); /**< (6) like (5) but no filename will be reported */ +RYML_EXPORT Tree parse_in_place(Parser *parser, csubstr filename, substr yaml ); /**< (7) create a new tree, and parse YAML into its root node. + * + * The filename will be used in any error messages + * arising during the parse. The tree is created with + * the callbacks currently in the parser. + */ +RYML_EXPORT Tree parse_in_place(Parser *parser, substr yaml ); /**< (8) like (7) but no filename will be reported */ + + +// this is vertically aligned to highlight the parameter differences. +RYML_EXPORT void parse_json_in_place(Parser *parser, csubstr filename, substr json, Tree *t, id_type node_id); ///< (1) parse JSON into an existing tree node. The filename will be used in any error messages arising during the parse. +RYML_EXPORT void parse_json_in_place(Parser *parser, substr json, Tree *t, id_type node_id); ///< (2) like (1) but no filename will be reported +RYML_EXPORT void parse_json_in_place(Parser *parser, csubstr filename, substr json, Tree *t ); ///< (3) parse JSON into an existing tree, into its root node. +RYML_EXPORT void parse_json_in_place(Parser *parser, substr json, Tree *t ); ///< (4) like (3) but no filename will be reported +RYML_EXPORT void parse_json_in_place(Parser *parser, csubstr filename, substr json, NodeRef node ); ///< (5) like (1) but the node is given as a NodeRef +RYML_EXPORT void parse_json_in_place(Parser *parser, substr json, NodeRef node ); ///< (6) like (5) but no filename will be reported +RYML_EXPORT Tree parse_json_in_place(Parser *parser, csubstr filename, substr json ); ///< (7) create a new tree, and parse JSON into its root node. +RYML_EXPORT Tree parse_json_in_place(Parser *parser, substr json ); ///< (8) like (7) but no filename will be reported + +/** @} */ + + +//----------------------------------------------------------------------------- + +/** @defgroup doc_parse_in_place___with_temporary_parser Parse in place with temporary parser + * + * @brief parse a mutable YAML source buffer. Scalars requiring + * filtering are mutated in place (except in the rare cases where the + * filtered scalar is longer than the original scalar). + * + * @note These freestanding functions use a temporary parser object, + * and are convenience functions to enable the user to easily parse + * YAML without the need to explicitly instantiate a parser and event + * handler. Note that some properties (notably node locations in the + * original source code) are only available through the parser + * class. If you need access to any of these properties, use + * the appropriate overload from @ref doc_parse_in_place__with_existing_parser + * + * @{ + */ + +// this is vertically aligned to highlight the parameter differences. +RYML_EXPORT void parse_in_place(csubstr filename, substr yaml, Tree *t, id_type node_id); ///< (1) parse YAML into an existing tree node. The filename will be used in any error messages arising during the parse. +RYML_EXPORT void parse_in_place( substr yaml, Tree *t, id_type node_id); ///< (2) like (1) but no filename will be reported +RYML_EXPORT void parse_in_place(csubstr filename, substr yaml, Tree *t ); ///< (3) parse YAML into an existing tree, into its root node. +RYML_EXPORT void parse_in_place( substr yaml, Tree *t ); ///< (4) like (3) but no filename will be reported +RYML_EXPORT void parse_in_place(csubstr filename, substr yaml, NodeRef node ); ///< (5) like (1) but the node is given as a NodeRef +RYML_EXPORT void parse_in_place( substr yaml, NodeRef node ); ///< (6) like (5) but no filename will be reported +RYML_EXPORT Tree parse_in_place(csubstr filename, substr yaml ); ///< (7) create a new tree, and parse YAML into its root node. +RYML_EXPORT Tree parse_in_place( substr yaml ); ///< (8) like (7) but no filename will be reported + +// this is vertically aligned to highlight the parameter differences. +RYML_EXPORT void parse_json_in_place(csubstr filename, substr json, Tree *t, id_type node_id); ///< (1) parse JSON into an existing tree node. The filename will be used in any error messages arising during the parse. +RYML_EXPORT void parse_json_in_place( substr json, Tree *t, id_type node_id); ///< (2) like (1) but no filename will be reported +RYML_EXPORT void parse_json_in_place(csubstr filename, substr json, Tree *t ); ///< (3) parse JSON into an existing tree, into its root node. +RYML_EXPORT void parse_json_in_place( substr json, Tree *t ); ///< (4) like (3) but no filename will be reported +RYML_EXPORT void parse_json_in_place(csubstr filename, substr json, NodeRef node ); ///< (5) like (1) but the node is given as a NodeRef +RYML_EXPORT void parse_json_in_place( substr json, NodeRef node ); ///< (6) like (5) but no filename will be reported +RYML_EXPORT Tree parse_json_in_place(csubstr filename, substr json ); ///< (7) create a new tree, and parse JSON into its root node. +RYML_EXPORT Tree parse_json_in_place( substr json ); ///< (8) like (7) but no filename will be reported + +/** @} */ + + +//----------------------------------------------------------------------------- + + +/** @defgroup doc_parse_in_arena__with_existing_parser Parse in arena with existing parser + * + * @brief parse a read-only (immutable) YAML source buffer. This is + * achieved by first copying the contents of the buffer to the tree's + * arena, and then calling @ref parse_in_arena() . All the resulting + * scalars will be filtered in the arena. These overloads accept an + * existing parser object, and provide the opportunity to use special + * parser options. + * + * @see ParserOptions + * + * + * @note These freestanding functions use a temporary parser object, + * and are convenience functions to easily parse YAML without the need + * to instantiate a separate parser. Note that some properties + * (notably node locations in the original source code) are only + * available through the parser class. If you need access to any of + * these properties, use the appropriate overload from @ref + * doc_parse_in_arena__with_existing_parser + * + * @warning overloads receiving a substr YAML buffer are intentionally + * left undefined, such that calling parse_in_arena() with a substr + * will cause a linker error. This is to prevent an accidental copy of + * the source buffer to the tree's arena, because substr (which is + * mutable) is implicitly convertible to csubstr (which is + * immutable). If you really intend to parse a mutable buffer in the + * tree's arena, convert it first to immutable by assigning the substr + * to a csubstr prior to calling parse_in_arena(). This is not needed + * for parse_in_place() because csubstr is not implicitly convertible + * to substr. To be clear: + * ```c++ + * substr mutable_buffer = ...; + * parser.parse_in_arena(mutable_buffer); // linker error + * + * csubstr immutable_buffer = ...; + * parser.parse_in_arena(immutable_buffer); // ok + * ``` + * + * @{ + */ + +#define RYML_DONT_PARSE_SUBSTR_IN_ARENA "" \ + "Do not pass a (mutable) substr to parse_in_arena(); " \ + "if you have a substr, it should be parsed in place. " \ + "Consider using parse_in_place() instead, or convert " \ + "the buffer to csubstr prior to calling. This function " \ + " is deliberately left undefined, so that calling it " \ + "will cause a linker error." + +// this is vertically aligned to highlight the parameter differences. +RYML_EXPORT void parse_in_arena(Parser *parser, csubstr filename, csubstr yaml, Tree *t, id_type node_id); ///< (1) parse YAML into an existing tree node. The filename will be used in any error messages arising during the parse. +RYML_EXPORT void parse_in_arena(Parser *parser, csubstr yaml, Tree *t, id_type node_id); ///< (2) like (1) but no filename will be reported +RYML_EXPORT void parse_in_arena(Parser *parser, csubstr filename, csubstr yaml, Tree *t ); ///< (3) parse YAML into an existing tree, into its root node. +RYML_EXPORT void parse_in_arena(Parser *parser, csubstr yaml, Tree *t ); ///< (4) like (3) but no filename will be reported +RYML_EXPORT void parse_in_arena(Parser *parser, csubstr filename, csubstr yaml, NodeRef node ); ///< (5) like (1) but the node is given as a NodeRef +RYML_EXPORT void parse_in_arena(Parser *parser, csubstr yaml, NodeRef node ); ///< (6) like (5) but no filename will be reported +RYML_EXPORT Tree parse_in_arena(Parser *parser, csubstr filename, csubstr yaml ); ///< (7) create a new tree, and parse YAML into its root node. +RYML_EXPORT Tree parse_in_arena(Parser *parser, csubstr yaml ); ///< (8) like (7) but no filename will be reported + +// this is vertically aligned to highlight the parameter differences. +RYML_EXPORT void parse_json_in_arena(Parser *parser, csubstr filename, csubstr json, Tree *t, id_type node_id); ///< (1) parse JSON into an existing tree node. The filename will be used in any error messages arising during the parse. +RYML_EXPORT void parse_json_in_arena(Parser *parser, csubstr json, Tree *t, id_type node_id); ///< (2) like (1) but no filename will be reported +RYML_EXPORT void parse_json_in_arena(Parser *parser, csubstr filename, csubstr json, Tree *t ); ///< (3) parse JSON into an existing tree, into its root node. +RYML_EXPORT void parse_json_in_arena(Parser *parser, csubstr json, Tree *t ); ///< (4) like (3) but no filename will be reported +RYML_EXPORT void parse_json_in_arena(Parser *parser, csubstr filename, csubstr json, NodeRef node ); ///< (5) like (1) but the node is given as a NodeRef +RYML_EXPORT void parse_json_in_arena(Parser *parser, csubstr json, NodeRef node ); ///< (6) like (5) but no filename will be reported +RYML_EXPORT Tree parse_json_in_arena(Parser *parser, csubstr filename, csubstr json ); ///< (7) create a new tree, and parse JSON into its root node. +RYML_EXPORT Tree parse_json_in_arena(Parser *parser, csubstr json ); ///< (8) like (7) but no filename will be reported + +/* READ THE DEPRECATION NOTE! + * + * All of the functions below are intentionally left undefined, to + * prevent them being used. + * + */ +/** @cond dev */ +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(Parser *parser, substr yaml, Tree *t, id_type node_id); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(Parser *parser, csubstr filename, substr yaml, Tree *t, id_type node_id); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(Parser *parser, substr yaml, Tree *t ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(Parser *parser, csubstr filename, substr yaml, Tree *t ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(Parser *parser, substr yaml, NodeRef node ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(Parser *parser, csubstr filename, substr yaml, NodeRef node ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_in_arena(Parser *parser, substr yaml ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_in_arena(Parser *parser, csubstr filename, substr yaml ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena(Parser *parser, substr json, Tree *t, id_type node_id); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena(Parser *parser, csubstr filename, substr json, Tree *t, id_type node_id); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena(Parser *parser, substr json, Tree *t ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena(Parser *parser, csubstr filename, substr json, Tree *t ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena(Parser *parser, substr json, NodeRef node ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena(Parser *parser, csubstr filename, substr json, NodeRef node ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_json_in_arena(Parser *parser, substr json ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_json_in_arena(Parser *parser, csubstr filename, substr json ); +/** @endcond */ + +/** @} */ + + +//----------------------------------------------------------------------------- + + +/** @defgroup doc_parse_in_arena__with_temporary_parser Parse in arena with temporary parser + * + * @brief parse a read-only (immutable) YAML source buffer. This is + * achieved by first copying the contents of the buffer to the tree's + * arena, and then calling @ref parse_in_arena() . + * + * @note These freestanding functions use a temporary parser object, + * and are convenience functions to easily one-off parse YAML without + * the need to instantiate a separate parser. Note that some + * properties (notably node locations in the original source code) are + * only available through the parser class. If you need access to any + * of these properties, use the appropriate overload from @ref + * doc_parse_in_arena__with_existing_parser + * + * @warning overloads receiving a substr YAML buffer are intentionally + * left undefined, such that calling parse_in_arena() with a substr + * will cause a linker error. This is to prevent an accidental copy of + * the source buffer to the tree's arena, because substr (which is + * mutable) is implicitly convertible to csubstr (which is + * immutable). If you really intend to parse a mutable buffer in the + * tree's arena, convert it first to immutable by assigning the substr + * to a csubstr prior to calling parse_in_arena(). This is not needed + * for parse_in_place() because csubstr is not implicitly convertible + * to substr. To be clear: + * ```c++ + * substr mutable_buffer = ...; + * parser.parse_in_arena(mutable_buffer); // linker error + * + * csubstr immutable_buffer = ...; + * parser.parse_in_arena(immutable_buffer); // ok + * ``` + * + * @{ + */ + +// this is vertically aligned to highlight the parameter differences. +RYML_EXPORT void parse_in_arena(csubstr filename, csubstr yaml, Tree *t, id_type node_id); ///< (1) parse YAML into an existing tree node. The filename will be used in any error messages arising during the parse. +RYML_EXPORT void parse_in_arena( csubstr yaml, Tree *t, id_type node_id); ///< (2) like (1) but no filename will be reported +RYML_EXPORT void parse_in_arena(csubstr filename, csubstr yaml, Tree *t ); ///< (3) parse YAML into an existing tree, into its root node. +RYML_EXPORT void parse_in_arena( csubstr yaml, Tree *t ); ///< (4) like (3) but no filename will be reported +RYML_EXPORT void parse_in_arena(csubstr filename, csubstr yaml, NodeRef node ); ///< (5) like (1) but the node is given as a NodeRef +RYML_EXPORT void parse_in_arena( csubstr yaml, NodeRef node ); ///< (6) like (5) but no filename will be reported +RYML_EXPORT Tree parse_in_arena(csubstr filename, csubstr yaml ); ///< (7) create a new tree, and parse YAML into its root node. +RYML_EXPORT Tree parse_in_arena( csubstr yaml ); ///< (8) like (7) but no filename will be reported + +// this is vertically aligned to highlight the parameter differences. +RYML_EXPORT void parse_json_in_arena(csubstr filename, csubstr json, Tree *t, id_type node_id); ///< (1) parse JSON into an existing tree node. The filename will be used in any error messages arising during the parse. +RYML_EXPORT void parse_json_in_arena( csubstr json, Tree *t, id_type node_id); ///< (2) like (1) but no filename will be reported +RYML_EXPORT void parse_json_in_arena(csubstr filename, csubstr json, Tree *t ); ///< (3) parse JSON into an existing tree, into its root node. +RYML_EXPORT void parse_json_in_arena( csubstr json, Tree *t ); ///< (4) like (3) but no filename will be reported +RYML_EXPORT void parse_json_in_arena(csubstr filename, csubstr json, NodeRef node ); ///< (5) like (1) but the node is given as a NodeRef +RYML_EXPORT void parse_json_in_arena( csubstr json, NodeRef node ); ///< (6) like (5) but no filename will be reported +RYML_EXPORT Tree parse_json_in_arena(csubstr filename, csubstr json ); ///< (7) create a new tree, and parse JSON into its root node. +RYML_EXPORT Tree parse_json_in_arena( csubstr json ); ///< (8) like (7) but no filename will be reported + +/* READ THE DEPRECATION NOTE! + * + * All of the functions below are intentionally left undefined, to + * prevent them being used. + */ +/** @cond dev */ +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena( substr yaml, Tree *t, id_type node_id); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr yaml, Tree *t, id_type node_id); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena( substr yaml, Tree *t ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr yaml, Tree *t ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena( substr yaml, NodeRef node ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr yaml, NodeRef node ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_in_arena( substr yaml ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_in_arena(csubstr filename, substr yaml ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena( substr json, Tree *t, id_type node_id); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena(csubstr filename, substr json, Tree *t, id_type node_id); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena( substr json, Tree *t ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena(csubstr filename, substr json, Tree *t ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena( substr json, NodeRef node ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena(csubstr filename, substr json, NodeRef node ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_json_in_arena( substr json ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_json_in_arena(csubstr filename, substr json ); +/** @endcond */ + +/** @} */ +/** @} */ + +} // namespace yml +} // namespace c4 + +#endif /* _C4_YML_PARSE_HPP_ */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/parse.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/std/map.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/std/map.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_STD_MAP_HPP_ +#define _C4_YML_STD_MAP_HPP_ + +/** @file map.hpp write/read std::map to/from a YAML tree. */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp +//#include "c4/yml/node.hpp" +#if !defined(C4_YML_NODE_HPP_) && !defined(_C4_YML_NODE_HPP_) +#error "amalgamate: file c4/yml/node.hpp must have been included at this point" +#endif /* C4_YML_NODE_HPP_ */ + +#include + +namespace c4 { +namespace yml { + +// std::map requires child nodes in the data +// tree hierarchy (a MAP node in ryml parlance). +// So it should be serialized via write()/read(). + +template +void write(c4::yml::NodeRef *n, std::map const& m) +{ + *n |= c4::yml::MAP; + for(auto const& C4_RESTRICT p : m) + { + auto ch = n->append_child(); + ch << c4::yml::key(p.first); + ch << p.second; + } +} + +template +bool read(c4::yml::ConstNodeRef const& n, std::map * m) +{ + K k{}; + V v{}; + for(auto const& C4_RESTRICT ch : n) + { + ch >> c4::yml::key(k); + ch >> v; + m->emplace(std::make_pair(std::move(k), std::move(v))); + } + return true; +} + +} // namespace yml +} // namespace c4 + +#endif // _C4_YML_STD_MAP_HPP_ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/std/map.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/std/string.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/std/string.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef C4_YML_STD_STRING_HPP_ +#define C4_YML_STD_STRING_HPP_ + +/** @file string.hpp substring conversions for/from std::string */ + +// everything we need is implemented here: +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/std/string.hpp +//#include +#if !defined(C4_STD_STRING_HPP_) && !defined(_C4_STD_STRING_HPP_) +#error "amalgamate: file c4/std/string.hpp must have been included at this point" +#endif /* C4_STD_STRING_HPP_ */ + + +#endif // C4_YML_STD_STRING_HPP_ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/std/string.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/std/vector.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/std/vector.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_STD_VECTOR_HPP_ +#define _C4_YML_STD_VECTOR_HPP_ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp +//#include "c4/yml/node.hpp" +#if !defined(C4_YML_NODE_HPP_) && !defined(_C4_YML_NODE_HPP_) +#error "amalgamate: file c4/yml/node.hpp must have been included at this point" +#endif /* C4_YML_NODE_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/std/vector.hpp +//#include +#if !defined(C4_STD_VECTOR_HPP_) && !defined(_C4_STD_VECTOR_HPP_) +#error "amalgamate: file c4/std/vector.hpp must have been included at this point" +#endif /* C4_STD_VECTOR_HPP_ */ + +//included above: +//#include + +namespace c4 { +namespace yml { + +// vector is a sequence-like type, and it requires child nodes +// in the data tree hierarchy (a SEQ node in ryml parlance). +// So it should be serialized via write()/read(). + + +template +void write(c4::yml::NodeRef *n, std::vector const& vec) +{ + *n |= c4::yml::SEQ; + for(V const& v : vec) + n->append_child() << v; +} + +template +bool read(c4::yml::ConstNodeRef const& n, std::vector *vec) +{ + C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wuseless-cast") + vec->resize(static_cast(n.num_children())); + C4_SUPPRESS_WARNING_GCC_POP + size_t pos = 0; + for(ConstNodeRef const child : n) + child >> (*vec)[pos++]; + return true; +} + +/** specialization: std::vector uses std::vector::reference as + * the return value of its operator[]. */ +template +bool read(c4::yml::ConstNodeRef const& n, std::vector *vec) +{ + C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wuseless-cast") + vec->resize(static_cast(n.num_children())); + C4_SUPPRESS_WARNING_GCC_POP + size_t pos = 0; + bool tmp = {}; + for(ConstNodeRef const child : n) + { + child >> tmp; + (*vec)[pos++] = tmp; + } + return true; +} + +} // namespace yml +} // namespace c4 + +#endif // _C4_YML_STD_VECTOR_HPP_ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/std/vector.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/std/std.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/std/std.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_STD_STD_HPP_ +#define _C4_YML_STD_STD_HPP_ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/std/string.hpp +//#include "c4/yml/std/string.hpp" +#if !defined(C4_YML_STD_STRING_HPP_) && !defined(_C4_YML_STD_STRING_HPP_) +#error "amalgamate: file c4/yml/std/string.hpp must have been included at this point" +#endif /* C4_YML_STD_STRING_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/std/vector.hpp +//#include "c4/yml/std/vector.hpp" +#if !defined(C4_YML_STD_VECTOR_HPP_) && !defined(_C4_YML_STD_VECTOR_HPP_) +#error "amalgamate: file c4/yml/std/vector.hpp must have been included at this point" +#endif /* C4_YML_STD_VECTOR_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/std/map.hpp +//#include "c4/yml/std/map.hpp" +#if !defined(C4_YML_STD_MAP_HPP_) && !defined(_C4_YML_STD_MAP_HPP_) +#error "amalgamate: file c4/yml/std/map.hpp must have been included at this point" +#endif /* C4_YML_STD_MAP_HPP_ */ + + +#endif // _C4_YML_STD_STD_HPP_ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/std/std.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/version.cpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/version.cpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifdef RYML_SINGLE_HDR_DEFINE_NOW +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/version.hpp +//#include "c4/yml/version.hpp" +#if !defined(C4_YML_VERSION_HPP_) && !defined(_C4_YML_VERSION_HPP_) +#error "amalgamate: file c4/yml/version.hpp must have been included at this point" +#endif /* C4_YML_VERSION_HPP_ */ + + +namespace c4 { +namespace yml { + +csubstr version() +{ + return RYML_VERSION; +} + +int version_major() +{ + return RYML_VERSION_MAJOR; +} + +int version_minor() +{ + return RYML_VERSION_MINOR; +} + +int version_patch() +{ + return RYML_VERSION_PATCH; +} + +} // namespace yml +} // namespace c4 + +#endif /* RYML_SINGLE_HDR_DEFINE_NOW */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/version.cpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/common.cpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/common.cpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifdef RYML_SINGLE_HDR_DEFINE_NOW +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/common.hpp +//#include "c4/yml/common.hpp" +#if !defined(C4_YML_COMMON_HPP_) && !defined(_C4_YML_COMMON_HPP_) +#error "amalgamate: file c4/yml/common.hpp must have been included at this point" +#endif /* C4_YML_COMMON_HPP_ */ + + +#ifndef RYML_NO_DEFAULT_CALLBACKS +//included above: +//# include +//included above: +//# include +# ifdef RYML_DEFAULT_CALLBACK_USES_EXCEPTIONS +# include +# endif +#endif // RYML_NO_DEFAULT_CALLBACKS + + +namespace c4 { +namespace yml { + +C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast") +C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(4702/*unreachable code*/) // on the call to the unreachable macro + +namespace { +Callbacks s_default_callbacks; +} // anon namespace + +#ifndef RYML_NO_DEFAULT_CALLBACKS +void report_error_impl(const char* msg, size_t length, Location loc, FILE *f) +{ + if(!f) + f = stderr; + if(loc) + { + if(!loc.name.empty()) + { + // this is more portable than using fprintf("%.*s:") which + // is not available in some embedded platforms + fwrite(loc.name.str, 1, loc.name.len, f); + fputc(':', f); + } + fprintf(f, "%zu:", loc.line); + if(loc.col) + fprintf(f, "%zu:", loc.col); + if(loc.offset) + fprintf(f, " (%zuB):", loc.offset); + fputc(' ', f); + } + RYML_ASSERT(!csubstr(msg, length).ends_with('\0')); + fwrite(msg, 1, length, f); + fputc('\n', f); + fflush(f); +} + +[[noreturn]] void error_impl(const char* msg, size_t length, Location loc, void * /*user_data*/) +{ + RYML_ASSERT(!csubstr(msg, length).ends_with('\0')); + report_error_impl(msg, length, loc, nullptr); +#ifdef RYML_DEFAULT_CALLBACK_USES_EXCEPTIONS + throw std::runtime_error(std::string(msg, length)); +#else + ::abort(); +#endif +} + +void* allocate_impl(size_t length, void * /*hint*/, void * /*user_data*/) +{ + void *mem = ::malloc(length); + if(mem == nullptr) + { + const char msg[] = "could not allocate memory"; + error_impl(msg, sizeof(msg)-1, {}, nullptr); + } + return mem; +} + +void free_impl(void *mem, size_t /*length*/, void * /*user_data*/) +{ + ::free(mem); +} +#endif // RYML_NO_DEFAULT_CALLBACKS + + + +Callbacks::Callbacks() + : + m_user_data(nullptr), + #ifndef RYML_NO_DEFAULT_CALLBACKS + m_allocate(allocate_impl), + m_free(free_impl), + m_error(error_impl) + #else + m_allocate(nullptr), + m_free(nullptr), + m_error(nullptr) + #endif +{ +} + +Callbacks::Callbacks(void *user_data, pfn_allocate alloc_, pfn_free free_, pfn_error error_) + : + m_user_data(user_data), + #ifndef RYML_NO_DEFAULT_CALLBACKS + m_allocate(alloc_ ? alloc_ : allocate_impl), + m_free(free_ ? free_ : free_impl), + m_error((error_ ? error_ : error_impl)) + #else + m_allocate(alloc_), + m_free(free_), + m_error(error_) + #endif +{ + RYML_CHECK(m_allocate); + RYML_CHECK(m_free); + RYML_CHECK(m_error); +} + + +void set_callbacks(Callbacks const& c) +{ + s_default_callbacks = c; +} + +Callbacks const& get_callbacks() +{ + return s_default_callbacks; +} + +void reset_callbacks() +{ + set_callbacks(Callbacks()); +} + +// the [[noreturn]] attribute needs to be here as well (UB otherwise) +// https://en.cppreference.com/w/cpp/language/attributes/noreturn +[[noreturn]] void error(Callbacks const& cb, const char *msg, size_t msg_len, Location loc) +{ + cb.m_error(msg, msg_len, loc, cb.m_user_data); + abort(); // call abort in case the error callback didn't interrupt execution + C4_UNREACHABLE(); +} + +// the [[noreturn]] attribute needs to be here as well (UB otherwise) +// see https://en.cppreference.com/w/cpp/language/attributes/noreturn +[[noreturn]] void error(const char *msg, size_t msg_len, Location loc) +{ + error(s_default_callbacks, msg, msg_len, loc); + C4_UNREACHABLE(); +} + +C4_SUPPRESS_WARNING_MSVC_POP +C4_SUPPRESS_WARNING_GCC_CLANG_POP + +} // namespace yml +} // namespace c4 + +#endif /* RYML_SINGLE_HDR_DEFINE_NOW */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/common.cpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/node_type.cpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/node_type.cpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifdef RYML_SINGLE_HDR_DEFINE_NOW +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/node_type.hpp +//#include "c4/yml/node_type.hpp" +#if !defined(C4_YML_NODE_TYPE_HPP_) && !defined(_C4_YML_NODE_TYPE_HPP_) +#error "amalgamate: file c4/yml/node_type.hpp must have been included at this point" +#endif /* C4_YML_NODE_TYPE_HPP_ */ + + +namespace c4 { +namespace yml { + +const char* NodeType::type_str(NodeType_e ty) noexcept +{ + switch(ty & _TYMASK) + { + case KEYVAL: + return "KEYVAL"; + case KEY: + return "KEY"; + case VAL: + return "VAL"; + case MAP: + return "MAP"; + case SEQ: + return "SEQ"; + case KEYMAP: + return "KEYMAP"; + case KEYSEQ: + return "KEYSEQ"; + case DOCSEQ: + return "DOCSEQ"; + case DOCMAP: + return "DOCMAP"; + case DOCVAL: + return "DOCVAL"; + case DOC: + return "DOC"; + case STREAM: + return "STREAM"; + case NOTYPE: + return "NOTYPE"; + default: + if((ty & KEYVAL) == KEYVAL) + return "KEYVAL***"; + if((ty & KEYMAP) == KEYMAP) + return "KEYMAP***"; + if((ty & KEYSEQ) == KEYSEQ) + return "KEYSEQ***"; + if((ty & DOCSEQ) == DOCSEQ) + return "DOCSEQ***"; + if((ty & DOCMAP) == DOCMAP) + return "DOCMAP***"; + if((ty & DOCVAL) == DOCVAL) + return "DOCVAL***"; + if(ty & KEY) + return "KEY***"; + if(ty & VAL) + return "VAL***"; + if(ty & MAP) + return "MAP***"; + if(ty & SEQ) + return "SEQ***"; + if(ty & DOC) + return "DOC***"; + return "(unk)"; + } +} + +csubstr NodeType::type_str(substr buf, NodeType_e flags) noexcept +{ + size_t pos = 0; + bool gotone = false; + + #define _prflag(fl, txt) \ + do { \ + if((flags & fl) == (fl)) \ + { \ + if(gotone) \ + { \ + if(pos + 1 < buf.len) \ + buf[pos] = '|'; \ + ++pos; \ + } \ + csubstr fltxt = txt; \ + if(pos + fltxt.len <= buf.len) \ + memcpy(buf.str + pos, fltxt.str, fltxt.len); \ + pos += fltxt.len; \ + gotone = true; \ + flags = (flags & ~fl); /*remove the flag*/ \ + } \ + } while(0) + + _prflag(STREAM, "STREAM"); + _prflag(DOC, "DOC"); + // key properties + _prflag(KEY, "KEY"); + _prflag(KEYTAG, "KTAG"); + _prflag(KEYANCH, "KANCH"); + _prflag(KEYREF, "KREF"); + _prflag(KEY_LITERAL, "KLITERAL"); + _prflag(KEY_FOLDED, "KFOLDED"); + _prflag(KEY_SQUO, "KSQUO"); + _prflag(KEY_DQUO, "KDQUO"); + _prflag(KEY_PLAIN, "KPLAIN"); + _prflag(KEY_UNFILT, "KUNFILT"); + // val properties + _prflag(VAL, "VAL"); + _prflag(VALTAG, "VTAG"); + _prflag(VALANCH, "VANCH"); + _prflag(VALREF, "VREF"); + _prflag(VAL_UNFILT, "VUNFILT"); + _prflag(VAL_LITERAL, "VLITERAL"); + _prflag(VAL_FOLDED, "VFOLDED"); + _prflag(VAL_SQUO, "VSQUO"); + _prflag(VAL_DQUO, "VDQUO"); + _prflag(VAL_PLAIN, "VPLAIN"); + _prflag(VAL_UNFILT, "VUNFILT"); + // container properties + _prflag(MAP, "MAP"); + _prflag(SEQ, "SEQ"); + _prflag(FLOW_SL, "FLOWSL"); + _prflag(FLOW_ML, "FLOWML"); + _prflag(BLOCK, "BLCK"); + if(pos == 0) + _prflag(NOTYPE, "NOTYPE"); + + #undef _prflag + + if(pos < buf.len) + { + buf[pos] = '\0'; + return buf.first(pos); + } + else + { + csubstr failed; + failed.len = pos + 1; + failed.str = nullptr; + return failed; + } +} + + +//----------------------------------------------------------------------------- + +// see https://www.yaml.info/learn/quote.html#noplain +bool scalar_style_query_squo(csubstr s) noexcept +{ + return ! s.first_of_any("\n ", "\n\t"); +} + +// see https://www.yaml.info/learn/quote.html#noplain +bool scalar_style_query_plain(csubstr s) noexcept +{ + if(s.begins_with("-.")) + { + if(s == "-.inf" || s == "-.INF") + return true; + else if(s.sub(2).is_number()) + return true; + } + else if(s.begins_with_any("0123456789.-+") && s.is_number()) + { + return true; + } + return s != ':' + && ( ! s.begins_with_any("-:?*&,'\"{}[]|>%#@`\r")) // @ and ` are reserved characters + && ( ! s.ends_with_any(":#")) + // make this check in the last place, as it has linear + // complexity, while the previous ones are + // constant-time + && (s.first_of("\n#:[]{},") == npos); +} + +NodeType_e scalar_style_choose(csubstr s) noexcept +{ + if(s.len) + { + if(s.begins_with_any(" \n\t") + || + s.ends_with_any(" \n\t")) + { + return SCALAR_DQUO; + } + else if( ! scalar_style_query_plain(s)) + { + return scalar_style_query_squo(s) ? SCALAR_SQUO : SCALAR_DQUO; + } + // nothing remarkable - use plain + return SCALAR_PLAIN; + } + return s.str ? SCALAR_SQUO : SCALAR_PLAIN; +} + +NodeType_e scalar_style_json_choose(csubstr s) noexcept +{ + // do not quote special cases + bool plain = ( + (s == "true" || s == "false" || s == "null") + || + ( + // do not quote numbers + s.is_number() + && + ( + // quote integral numbers if they have a leading 0 + // https://github.com/biojppm/rapidyaml/issues/291 + (!(s.len > 1 && s.begins_with('0'))) + // do not quote reals with leading 0 + // https://github.com/biojppm/rapidyaml/issues/313 + || (s.find('.') != csubstr::npos) + ) + ) + ); + return plain ? SCALAR_PLAIN : SCALAR_DQUO; +} + +} // namespace yml +} // namespace c4 + +#endif /* RYML_SINGLE_HDR_DEFINE_NOW */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/node_type.cpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/tag.cpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/tag.cpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifdef RYML_SINGLE_HDR_DEFINE_NOW +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/tag.hpp +//#include "c4/yml/tag.hpp" +#if !defined(C4_YML_TAG_HPP_) && !defined(_C4_YML_TAG_HPP_) +#error "amalgamate: file c4/yml/tag.hpp must have been included at this point" +#endif /* C4_YML_TAG_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp +//#include "c4/yml/tree.hpp" +#if !defined(C4_YML_TREE_HPP_) && !defined(_C4_YML_TREE_HPP_) +#error "amalgamate: file c4/yml/tree.hpp must have been included at this point" +#endif /* C4_YML_TREE_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp +//#include "c4/yml/detail/parser_dbg.hpp" +#if !defined(C4_YML_DETAIL_PARSER_DBG_HPP_) && !defined(_C4_YML_DETAIL_PARSER_DBG_HPP_) +#error "amalgamate: file c4/yml/detail/parser_dbg.hpp must have been included at this point" +#endif /* C4_YML_DETAIL_PARSER_DBG_HPP_ */ + + + +namespace c4 { +namespace yml { + +bool is_custom_tag(csubstr tag) +{ + if((tag.len > 2) && (tag.str[0] == '!')) + { + size_t pos = tag.find('!', 1); + return pos != npos && pos > 1 && tag.str[1] != '<'; + } + return false; +} + +csubstr normalize_tag(csubstr tag) +{ + YamlTag_e t = to_tag(tag); + if(t != TAG_NONE) + return from_tag(t); + if(tag.begins_with("!<")) + tag = tag.sub(1); + if(tag.begins_with("'; + result = output.first(len); + } + else + { + result.str = nullptr; + result.len = len; + } + } + return result; +} + +YamlTag_e to_tag(csubstr tag) +{ + if(tag.begins_with("!<")) + tag = tag.sub(1); + if(tag.begins_with("!!")) + tag = tag.sub(2); + else if(tag.begins_with('!')) + return TAG_NONE; + else if(tag.begins_with("tag:yaml.org,2002:")) + { + RYML_ASSERT(csubstr("tag:yaml.org,2002:").len == 18); + tag = tag.sub(18); + } + else if(tag.begins_with(""}; + case TAG_OMAP: + return {""}; + case TAG_PAIRS: + return {""}; + case TAG_SET: + return {""}; + case TAG_SEQ: + return {""}; + case TAG_BINARY: + return {""}; + case TAG_BOOL: + return {""}; + case TAG_FLOAT: + return {""}; + case TAG_INT: + return {""}; + case TAG_MERGE: + return {""}; + case TAG_NULL: + return {""}; + case TAG_STR: + return {""}; + case TAG_TIMESTAMP: + return {""}; + case TAG_VALUE: + return {""}; + case TAG_YAML: + return {""}; + case TAG_NONE: + default: + return {""}; + } +} + +csubstr from_tag(YamlTag_e tag) +{ + switch(tag) + { + case TAG_MAP: + return {"!!map"}; + case TAG_OMAP: + return {"!!omap"}; + case TAG_PAIRS: + return {"!!pairs"}; + case TAG_SET: + return {"!!set"}; + case TAG_SEQ: + return {"!!seq"}; + case TAG_BINARY: + return {"!!binary"}; + case TAG_BOOL: + return {"!!bool"}; + case TAG_FLOAT: + return {"!!float"}; + case TAG_INT: + return {"!!int"}; + case TAG_MERGE: + return {"!!merge"}; + case TAG_NULL: + return {"!!null"}; + case TAG_STR: + return {"!!str"}; + case TAG_TIMESTAMP: + return {"!!timestamp"}; + case TAG_VALUE: + return {"!!value"}; + case TAG_YAML: + return {"!!yaml"}; + case TAG_NONE: + default: + return {""}; + } +} + + +bool TagDirective::create_from_str(csubstr directive_) +{ + csubstr directive = directive_; + directive = directive.sub(4); + if(!directive.begins_with(' ')) + return false; + directive = directive.triml(' '); + size_t pos = directive.find(' '); + if(pos == npos) + return false; + handle = directive.first(pos); + directive = directive.sub(handle.len).triml(' '); + pos = directive.find(' '); + if(pos != npos) + directive = directive.first(pos); + prefix = directive; + next_node_id = NONE; + _c4dbgpf("%TAG: handle={} prefix={}", handle, prefix); + return true; +} + +bool TagDirective::create_from_str(csubstr directive_, Tree *tree) +{ + _RYML_CB_CHECK(tree->callbacks(), directive_.begins_with("%TAG ")); + if(!create_from_str(directive_)) + { + _RYML_CB_ERR(tree->callbacks(), "invalid tag directive"); + } + next_node_id = tree->size(); + if(tree->size() > 0) + { + const id_type prev = tree->size() - 1; + if(tree->is_root(prev) && tree->type(prev) != NOTYPE && !tree->is_stream(prev)) + ++next_node_id; + } + _c4dbgpf("%TAG: handle={} prefix={} next_node={}", handle, prefix, next_node_id); + return true; +} + +size_t TagDirective::transform(csubstr tag, substr output, Callbacks const& callbacks) const +{ + _c4dbgpf("%TAG: handle={} prefix={} next_node={}. tag={}", handle, prefix, next_node_id, tag); + _RYML_CB_ASSERT(callbacks, tag.len >= handle.len); + csubstr rest = tag.sub(handle.len); + _c4dbgpf("%TAG: rest={}", rest); + if(rest.begins_with('<')) + { + _c4dbgpf("%TAG: begins with <. rest={}", rest); + if(C4_UNLIKELY(!rest.ends_with('>'))) + _RYML_CB_ERR(callbacks, "malformed tag"); + rest = rest.offs(1, 1); + if(rest.begins_with(prefix)) + { + _c4dbgpf("%TAG: already transformed! actual={}", rest.sub(prefix.len)); + return 0; // return 0 to signal that the tag is local and cannot be resolved + } + } + size_t len = 1u + prefix.len + rest.len + 1u; + size_t numpc = rest.count('%'); + if(numpc == 0) + { + if(len <= output.len) + { + output.str[0] = '<'; + memcpy(1u + output.str, prefix.str, prefix.len); + memcpy(1u + output.str + prefix.len, rest.str, rest.len); + output.str[1u + prefix.len + rest.len] = '>'; + } + } + else + { + // need to decode URI % sequences + size_t pos = rest.find('%'); + _RYML_CB_ASSERT(callbacks, pos != npos); + do { + size_t next = rest.first_not_of("0123456789abcdefABCDEF", pos+1); + if(next == npos) + next = rest.len; + _RYML_CB_CHECK(callbacks, pos+1 < next); + _RYML_CB_CHECK(callbacks, pos+1 + 2 <= next); + size_t delta = next - (pos+1); + len -= delta; + pos = rest.find('%', pos+1); + } while(pos != npos); + if(len <= output.len) + { + size_t prev = 0, wpos = 0; + auto appendstr = [&](csubstr s) { memcpy(output.str + wpos, s.str, s.len); wpos += s.len; }; + auto appendchar = [&](char c) { output.str[wpos++] = c; }; + appendchar('<'); + appendstr(prefix); + pos = rest.find('%'); + _RYML_CB_ASSERT(callbacks, pos != npos); + do { + size_t next = rest.first_not_of("0123456789abcdefABCDEF", pos+1); + if(next == npos) + next = rest.len; + _RYML_CB_CHECK(callbacks, pos+1 < next); + _RYML_CB_CHECK(callbacks, pos+1 + 2 <= next); + uint8_t val; + if(C4_UNLIKELY(!read_hex(rest.range(pos+1, next), &val) || val > 127)) + _RYML_CB_ERR(callbacks, "invalid URI character"); + appendstr(rest.range(prev, pos)); + appendchar(static_cast(val)); + prev = next; + pos = rest.find('%', pos+1); + } while(pos != npos); + _RYML_CB_ASSERT(callbacks, pos == npos); + _RYML_CB_ASSERT(callbacks, prev > 0); + _RYML_CB_ASSERT(callbacks, rest.len >= prev); + appendstr(rest.sub(prev)); + appendchar('>'); + _RYML_CB_ASSERT(callbacks, wpos == len); + } + } + return len; +} + +} // namespace yml +} // namespace c4 + +#endif /* RYML_SINGLE_HDR_DEFINE_NOW */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/tag.cpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/tree.cpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.cpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifdef RYML_SINGLE_HDR_DEFINE_NOW +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp +//#include "c4/yml/tree.hpp" +#if !defined(C4_YML_TREE_HPP_) && !defined(_C4_YML_TREE_HPP_) +#error "amalgamate: file c4/yml/tree.hpp must have been included at this point" +#endif /* C4_YML_TREE_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp +//#include "c4/yml/detail/parser_dbg.hpp" +#if !defined(C4_YML_DETAIL_PARSER_DBG_HPP_) && !defined(_C4_YML_DETAIL_PARSER_DBG_HPP_) +#error "amalgamate: file c4/yml/detail/parser_dbg.hpp must have been included at this point" +#endif /* C4_YML_DETAIL_PARSER_DBG_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp +//#include "c4/yml/node.hpp" +#if !defined(C4_YML_NODE_HPP_) && !defined(_C4_YML_NODE_HPP_) +#error "amalgamate: file c4/yml/node.hpp must have been included at this point" +#endif /* C4_YML_NODE_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/reference_resolver.hpp +//#include "c4/yml/reference_resolver.hpp" +#if !defined(C4_YML_REFERENCE_RESOLVER_HPP_) && !defined(_C4_YML_REFERENCE_RESOLVER_HPP_) +#error "amalgamate: file c4/yml/reference_resolver.hpp must have been included at this point" +#endif /* C4_YML_REFERENCE_RESOLVER_HPP_ */ + + + +C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(4296/*expression is always 'boolean_value'*/) +C4_SUPPRESS_WARNING_MSVC(4702/*unreachable code*/) +C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast") +C4_SUPPRESS_WARNING_GCC("-Wtype-limits") +C4_SUPPRESS_WARNING_GCC("-Wuseless-cast") + +namespace c4 { +namespace yml { + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +NodeRef Tree::rootref() +{ + return NodeRef(this, root_id()); +} +ConstNodeRef Tree::rootref() const +{ + return ConstNodeRef(this, root_id()); +} + +ConstNodeRef Tree::crootref() const +{ + return ConstNodeRef(this, root_id()); +} + +NodeRef Tree::ref(id_type id) +{ + _RYML_CB_ASSERT(m_callbacks, id != NONE && id >= 0 && id < m_cap); + return NodeRef(this, id); +} +ConstNodeRef Tree::ref(id_type id) const +{ + _RYML_CB_ASSERT(m_callbacks, id != NONE && id >= 0 && id < m_cap); + return ConstNodeRef(this, id); +} +ConstNodeRef Tree::cref(id_type id) const +{ + _RYML_CB_ASSERT(m_callbacks, id != NONE && id >= 0 && id < m_cap); + return ConstNodeRef(this, id); +} + +NodeRef Tree::operator[] (csubstr key) +{ + return rootref()[key]; +} +ConstNodeRef Tree::operator[] (csubstr key) const +{ + return rootref()[key]; +} + +NodeRef Tree::operator[] (id_type i) +{ + return rootref()[i]; +} +ConstNodeRef Tree::operator[] (id_type i) const +{ + return rootref()[i]; +} + +NodeRef Tree::docref(id_type i) +{ + return ref(doc(i)); +} +ConstNodeRef Tree::docref(id_type i) const +{ + return cref(doc(i)); +} +ConstNodeRef Tree::cdocref(id_type i) const +{ + return cref(doc(i)); +} + + +//----------------------------------------------------------------------------- +Tree::Tree(Callbacks const& cb) + : m_buf(nullptr) + , m_cap(0) + , m_size(0) + , m_free_head(NONE) + , m_free_tail(NONE) + , m_arena() + , m_arena_pos(0) + , m_callbacks(cb) + , m_tag_directives() +{ +} + +Tree::Tree(id_type node_capacity, size_t arena_capacity, Callbacks const& cb) + : Tree(cb) +{ + reserve(node_capacity); + reserve_arena(arena_capacity); +} + +Tree::~Tree() +{ + _free(); +} + + +Tree::Tree(Tree const& that) : Tree(that.m_callbacks) +{ + _copy(that); +} + +Tree& Tree::operator= (Tree const& that) +{ + _free(); + m_callbacks = that.m_callbacks; + _copy(that); + return *this; +} + +Tree::Tree(Tree && that) noexcept : Tree(that.m_callbacks) +{ + _move(that); +} + +Tree& Tree::operator= (Tree && that) RYML_NOEXCEPT +{ + _free(); + m_callbacks = that.m_callbacks; + _move(that); + return *this; +} + +void Tree::_free() +{ + if(m_buf) + { + _RYML_CB_ASSERT(m_callbacks, m_cap > 0); + _RYML_CB_FREE(m_callbacks, m_buf, NodeData, (size_t)m_cap); + } + if(m_arena.str) + { + _RYML_CB_ASSERT(m_callbacks, m_arena.len > 0); + _RYML_CB_FREE(m_callbacks, m_arena.str, char, m_arena.len); + } + _clear(); +} + + +C4_SUPPRESS_WARNING_GCC_PUSH +#if defined(__GNUC__) && __GNUC__>= 8 + C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wclass-memaccess") // error: ‘void* memset(void*, int, size_t)’ clearing an object of type ‘class c4::yml::Tree’ with no trivial copy-assignment; use assignment or value-initialization instead +#endif + +void Tree::_clear() +{ + m_buf = nullptr; + m_cap = 0; + m_size = 0; + m_free_head = 0; + m_free_tail = 0; + m_arena = {}; + m_arena_pos = 0; + for(id_type i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) + m_tag_directives[i] = {}; +} + +void Tree::_copy(Tree const& that) +{ + _RYML_CB_ASSERT(m_callbacks, m_buf == nullptr); + _RYML_CB_ASSERT(m_callbacks, m_arena.str == nullptr); + _RYML_CB_ASSERT(m_callbacks, m_arena.len == 0); + if(that.m_cap) + { + m_buf = _RYML_CB_ALLOC_HINT(m_callbacks, NodeData, (size_t)that.m_cap, that.m_buf); + memcpy(m_buf, that.m_buf, (size_t)that.m_cap * sizeof(NodeData)); + } + m_cap = that.m_cap; + m_size = that.m_size; + m_free_head = that.m_free_head; + m_free_tail = that.m_free_tail; + m_arena_pos = that.m_arena_pos; + m_arena = that.m_arena; + if(that.m_arena.str) + { + _RYML_CB_ASSERT(m_callbacks, that.m_arena.len > 0); + substr arena; + arena.str = _RYML_CB_ALLOC_HINT(m_callbacks, char, that.m_arena.len, that.m_arena.str); + arena.len = that.m_arena.len; + _relocate(arena); // does a memcpy of the arena and updates nodes using the old arena + m_arena = arena; + } + for(id_type i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) + m_tag_directives[i] = that.m_tag_directives[i]; +} + +void Tree::_move(Tree & that) noexcept +{ + _RYML_CB_ASSERT(m_callbacks, m_buf == nullptr); + _RYML_CB_ASSERT(m_callbacks, m_arena.str == nullptr); + _RYML_CB_ASSERT(m_callbacks, m_arena.len == 0); + m_buf = that.m_buf; + m_cap = that.m_cap; + m_size = that.m_size; + m_free_head = that.m_free_head; + m_free_tail = that.m_free_tail; + m_arena = that.m_arena; + m_arena_pos = that.m_arena_pos; + for(id_type i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) + m_tag_directives[i] = that.m_tag_directives[i]; + that._clear(); +} + +void Tree::_relocate(substr next_arena) +{ + _RYML_CB_ASSERT(m_callbacks, next_arena.not_empty()); + _RYML_CB_ASSERT(m_callbacks, next_arena.len >= m_arena.len); + if(m_arena_pos) + memcpy(next_arena.str, m_arena.str, m_arena_pos); + for(NodeData *C4_RESTRICT n = m_buf, *e = m_buf + m_cap; n != e; ++n) + { + if(in_arena(n->m_key.scalar)) + n->m_key.scalar = _relocated(n->m_key.scalar, next_arena); + if(in_arena(n->m_key.tag)) + n->m_key.tag = _relocated(n->m_key.tag, next_arena); + if(in_arena(n->m_key.anchor)) + n->m_key.anchor = _relocated(n->m_key.anchor, next_arena); + if(in_arena(n->m_val.scalar)) + n->m_val.scalar = _relocated(n->m_val.scalar, next_arena); + if(in_arena(n->m_val.tag)) + n->m_val.tag = _relocated(n->m_val.tag, next_arena); + if(in_arena(n->m_val.anchor)) + n->m_val.anchor = _relocated(n->m_val.anchor, next_arena); + } + for(TagDirective &C4_RESTRICT td : m_tag_directives) + { + if(in_arena(td.prefix)) + td.prefix = _relocated(td.prefix, next_arena); + if(in_arena(td.handle)) + td.handle = _relocated(td.handle, next_arena); + } +} + + +//----------------------------------------------------------------------------- +void Tree::reserve(id_type cap) +{ + if(cap > m_cap) + { + NodeData *buf = _RYML_CB_ALLOC_HINT(m_callbacks, NodeData, (size_t)cap, m_buf); + if(m_buf) + { + memcpy(buf, m_buf, (size_t)m_cap * sizeof(NodeData)); + _RYML_CB_FREE(m_callbacks, m_buf, NodeData, (size_t)m_cap); + } + id_type first = m_cap, del = cap - m_cap; + m_cap = cap; + m_buf = buf; + _clear_range(first, del); + if(m_free_head != NONE) + { + _RYML_CB_ASSERT(m_callbacks, m_buf != nullptr); + _RYML_CB_ASSERT(m_callbacks, m_free_tail != NONE); + m_buf[m_free_tail].m_next_sibling = first; + m_buf[first].m_prev_sibling = m_free_tail; + m_free_tail = cap-1; + } + else + { + _RYML_CB_ASSERT(m_callbacks, m_free_tail == NONE); + m_free_head = first; + m_free_tail = cap-1; + } + _RYML_CB_ASSERT(m_callbacks, m_free_head == NONE || (m_free_head >= 0 && m_free_head < cap)); + _RYML_CB_ASSERT(m_callbacks, m_free_tail == NONE || (m_free_tail >= 0 && m_free_tail < cap)); + + if( ! m_size) + _claim_root(); + } +} + + +//----------------------------------------------------------------------------- +void Tree::clear() +{ + _clear_range(0, m_cap); + m_size = 0; + if(m_buf) + { + _RYML_CB_ASSERT(m_callbacks, m_cap >= 0); + m_free_head = 0; + m_free_tail = m_cap-1; + _claim_root(); + } + else + { + m_free_head = NONE; + m_free_tail = NONE; + } + for(id_type i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) + m_tag_directives[i] = {}; +} + +void Tree::_claim_root() +{ + id_type r = _claim(); + _RYML_CB_ASSERT(m_callbacks, r == 0); + _set_hierarchy(r, NONE, NONE); +} + + +//----------------------------------------------------------------------------- +void Tree::_clear_range(id_type first, id_type num) +{ + if(num == 0) + return; // prevent overflow when subtracting + _RYML_CB_ASSERT(m_callbacks, first >= 0 && first + num <= m_cap); + memset(m_buf + first, 0, (size_t)num * sizeof(NodeData)); // TODO we should not need this + for(id_type i = first, e = first + num; i < e; ++i) + { + _clear(i); + NodeData *n = m_buf + i; + n->m_prev_sibling = i - 1; + n->m_next_sibling = i + 1; + } + m_buf[first + num - 1].m_next_sibling = NONE; +} + +C4_SUPPRESS_WARNING_GCC_POP + + +//----------------------------------------------------------------------------- +void Tree::_release(id_type i) +{ + _RYML_CB_ASSERT(m_callbacks, i >= 0 && i < m_cap); + + _rem_hierarchy(i); + _free_list_add(i); + _clear(i); + + --m_size; +} + +//----------------------------------------------------------------------------- +// add to the front of the free list +void Tree::_free_list_add(id_type i) +{ + _RYML_CB_ASSERT(m_callbacks, i >= 0 && i < m_cap); + NodeData &C4_RESTRICT w = m_buf[i]; + + w.m_parent = NONE; + w.m_next_sibling = m_free_head; + w.m_prev_sibling = NONE; + if(m_free_head != NONE) + m_buf[m_free_head].m_prev_sibling = i; + m_free_head = i; + if(m_free_tail == NONE) + m_free_tail = m_free_head; +} + +void Tree::_free_list_rem(id_type i) +{ + if(m_free_head == i) + m_free_head = _p(i)->m_next_sibling; + _rem_hierarchy(i); +} + +//----------------------------------------------------------------------------- +id_type Tree::_claim() +{ + if(m_free_head == NONE || m_buf == nullptr) + { + id_type sz = 2 * m_cap; + sz = sz ? sz : 16; + reserve(sz); + _RYML_CB_ASSERT(m_callbacks, m_free_head != NONE); + } + + _RYML_CB_ASSERT(m_callbacks, m_size < m_cap); + _RYML_CB_ASSERT(m_callbacks, m_free_head >= 0 && m_free_head < m_cap); + + id_type ichild = m_free_head; + NodeData *child = m_buf + ichild; + + ++m_size; + m_free_head = child->m_next_sibling; + if(m_free_head == NONE) + { + m_free_tail = NONE; + _RYML_CB_ASSERT(m_callbacks, m_size == m_cap); + } + + _clear(ichild); + + return ichild; +} + +//----------------------------------------------------------------------------- + +C4_SUPPRESS_WARNING_GCC_PUSH +C4_SUPPRESS_WARNING_CLANG_PUSH +C4_SUPPRESS_WARNING_CLANG("-Wnull-dereference") +#if defined(__GNUC__) +#if (__GNUC__ >= 6) +C4_SUPPRESS_WARNING_GCC("-Wnull-dereference") +#endif +#if (__GNUC__ > 9) +C4_SUPPRESS_WARNING_GCC("-Wanalyzer-fd-leak") +#endif +#endif + +void Tree::_set_hierarchy(id_type ichild, id_type iparent, id_type iprev_sibling) +{ + _RYML_CB_ASSERT(m_callbacks, ichild >= 0 && ichild < m_cap); + _RYML_CB_ASSERT(m_callbacks, iparent == NONE || (iparent >= 0 && iparent < m_cap)); + _RYML_CB_ASSERT(m_callbacks, iprev_sibling == NONE || (iprev_sibling >= 0 && iprev_sibling < m_cap)); + + NodeData *C4_RESTRICT child = _p(ichild); + + child->m_parent = iparent; + child->m_prev_sibling = NONE; + child->m_next_sibling = NONE; + + if(iparent == NONE) + { + _RYML_CB_ASSERT(m_callbacks, ichild == 0); + _RYML_CB_ASSERT(m_callbacks, iprev_sibling == NONE); + } + + if(iparent == NONE) + return; + + id_type inext_sibling = iprev_sibling != NONE ? next_sibling(iprev_sibling) : first_child(iparent); + NodeData *C4_RESTRICT parent = get(iparent); + NodeData *C4_RESTRICT psib = get(iprev_sibling); + NodeData *C4_RESTRICT nsib = get(inext_sibling); + + if(psib) + { + _RYML_CB_ASSERT(m_callbacks, next_sibling(iprev_sibling) == id(nsib)); + child->m_prev_sibling = id(psib); + psib->m_next_sibling = id(child); + _RYML_CB_ASSERT(m_callbacks, psib->m_prev_sibling != psib->m_next_sibling || psib->m_prev_sibling == NONE); + } + + if(nsib) + { + _RYML_CB_ASSERT(m_callbacks, prev_sibling(inext_sibling) == id(psib)); + child->m_next_sibling = id(nsib); + nsib->m_prev_sibling = id(child); + _RYML_CB_ASSERT(m_callbacks, nsib->m_prev_sibling != nsib->m_next_sibling || nsib->m_prev_sibling == NONE); + } + + if(parent->m_first_child == NONE) + { + _RYML_CB_ASSERT(m_callbacks, parent->m_last_child == NONE); + parent->m_first_child = id(child); + parent->m_last_child = id(child); + } + else + { + if(child->m_next_sibling == parent->m_first_child) + parent->m_first_child = id(child); + + if(child->m_prev_sibling == parent->m_last_child) + parent->m_last_child = id(child); + } +} + +C4_SUPPRESS_WARNING_GCC_POP +C4_SUPPRESS_WARNING_CLANG_POP + + +//----------------------------------------------------------------------------- +void Tree::_rem_hierarchy(id_type i) +{ + _RYML_CB_ASSERT(m_callbacks, i >= 0 && i < m_cap); + + NodeData &C4_RESTRICT w = m_buf[i]; + + // remove from the parent + if(w.m_parent != NONE) + { + NodeData &C4_RESTRICT p = m_buf[w.m_parent]; + if(p.m_first_child == i) + { + p.m_first_child = w.m_next_sibling; + } + if(p.m_last_child == i) + { + p.m_last_child = w.m_prev_sibling; + } + } + + // remove from the used list + if(w.m_prev_sibling != NONE) + { + NodeData *C4_RESTRICT prev = get(w.m_prev_sibling); + prev->m_next_sibling = w.m_next_sibling; + } + if(w.m_next_sibling != NONE) + { + NodeData *C4_RESTRICT next = get(w.m_next_sibling); + next->m_prev_sibling = w.m_prev_sibling; + } +} + +//----------------------------------------------------------------------------- +id_type Tree::_do_reorder(id_type *node, id_type count) +{ + // swap this node if it's not in place + if(*node != count) + { + _swap(*node, count); + *node = count; + } + ++count; // bump the count from this node + + // now descend in the hierarchy + for(id_type i = first_child(*node); i != NONE; i = next_sibling(i)) + { + // this child may have been relocated to a different index, + // so get an updated version + count = _do_reorder(&i, count); + } + return count; +} + +void Tree::reorder() +{ + id_type r = root_id(); + _do_reorder(&r, 0); +} + + +//----------------------------------------------------------------------------- +void Tree::_swap(id_type n_, id_type m_) +{ + _RYML_CB_ASSERT(m_callbacks, (parent(n_) != NONE) || type(n_) == NOTYPE); + _RYML_CB_ASSERT(m_callbacks, (parent(m_) != NONE) || type(m_) == NOTYPE); + NodeType tn = type(n_); + NodeType tm = type(m_); + if(tn != NOTYPE && tm != NOTYPE) + { + _swap_props(n_, m_); + _swap_hierarchy(n_, m_); + } + else if(tn == NOTYPE && tm != NOTYPE) + { + _copy_props(n_, m_); + _free_list_rem(n_); + _copy_hierarchy(n_, m_); + _clear(m_); + _free_list_add(m_); + } + else if(tn != NOTYPE && tm == NOTYPE) + { + _copy_props(m_, n_); + _free_list_rem(m_); + _copy_hierarchy(m_, n_); + _clear(n_); + _free_list_add(n_); + } + else + { + C4_NEVER_REACH(); + } +} + +//----------------------------------------------------------------------------- +void Tree::_swap_hierarchy(id_type ia, id_type ib) +{ + if(ia == ib) return; + + for(id_type i = first_child(ia); i != NONE; i = next_sibling(i)) + { + if(i == ib || i == ia) + continue; + _p(i)->m_parent = ib; + } + + for(id_type i = first_child(ib); i != NONE; i = next_sibling(i)) + { + if(i == ib || i == ia) + continue; + _p(i)->m_parent = ia; + } + + auto & C4_RESTRICT a = *_p(ia); + auto & C4_RESTRICT b = *_p(ib); + auto & C4_RESTRICT pa = *_p(a.m_parent); + auto & C4_RESTRICT pb = *_p(b.m_parent); + + if(&pa == &pb) + { + if((pa.m_first_child == ib && pa.m_last_child == ia) + || + (pa.m_first_child == ia && pa.m_last_child == ib)) + { + std::swap(pa.m_first_child, pa.m_last_child); + } + else + { + bool changed = false; + if(pa.m_first_child == ia) + { + pa.m_first_child = ib; + changed = true; + } + if(pa.m_last_child == ia) + { + pa.m_last_child = ib; + changed = true; + } + if(pb.m_first_child == ib && !changed) + { + pb.m_first_child = ia; + } + if(pb.m_last_child == ib && !changed) + { + pb.m_last_child = ia; + } + } + } + else + { + if(pa.m_first_child == ia) + pa.m_first_child = ib; + if(pa.m_last_child == ia) + pa.m_last_child = ib; if(pb.m_first_child == ib) pb.m_first_child = ia; if(pb.m_last_child == ib) pb.m_last_child = ia; } - std::swap(a.m_first_child , b.m_first_child); - std::swap(a.m_last_child , b.m_last_child); + std::swap(a.m_first_child , b.m_first_child); + std::swap(a.m_last_child , b.m_last_child); + + if(a.m_prev_sibling != ib && b.m_prev_sibling != ia && + a.m_next_sibling != ib && b.m_next_sibling != ia) + { + if(a.m_prev_sibling != NONE && a.m_prev_sibling != ib) + _p(a.m_prev_sibling)->m_next_sibling = ib; + if(a.m_next_sibling != NONE && a.m_next_sibling != ib) + _p(a.m_next_sibling)->m_prev_sibling = ib; + if(b.m_prev_sibling != NONE && b.m_prev_sibling != ia) + _p(b.m_prev_sibling)->m_next_sibling = ia; + if(b.m_next_sibling != NONE && b.m_next_sibling != ia) + _p(b.m_next_sibling)->m_prev_sibling = ia; + std::swap(a.m_prev_sibling, b.m_prev_sibling); + std::swap(a.m_next_sibling, b.m_next_sibling); + } + else + { + if(a.m_next_sibling == ib) // n will go after m + { + _RYML_CB_ASSERT(m_callbacks, b.m_prev_sibling == ia); + if(a.m_prev_sibling != NONE) + { + _RYML_CB_ASSERT(m_callbacks, a.m_prev_sibling != ib); + _p(a.m_prev_sibling)->m_next_sibling = ib; + } + if(b.m_next_sibling != NONE) + { + _RYML_CB_ASSERT(m_callbacks, b.m_next_sibling != ia); + _p(b.m_next_sibling)->m_prev_sibling = ia; + } + id_type ns = b.m_next_sibling; + b.m_prev_sibling = a.m_prev_sibling; + b.m_next_sibling = ia; + a.m_prev_sibling = ib; + a.m_next_sibling = ns; + } + else if(a.m_prev_sibling == ib) // m will go after n + { + _RYML_CB_ASSERT(m_callbacks, b.m_next_sibling == ia); + if(b.m_prev_sibling != NONE) + { + _RYML_CB_ASSERT(m_callbacks, b.m_prev_sibling != ia); + _p(b.m_prev_sibling)->m_next_sibling = ia; + } + if(a.m_next_sibling != NONE) + { + _RYML_CB_ASSERT(m_callbacks, a.m_next_sibling != ib); + _p(a.m_next_sibling)->m_prev_sibling = ib; + } + id_type ns = b.m_prev_sibling; + a.m_prev_sibling = b.m_prev_sibling; + a.m_next_sibling = ib; + b.m_prev_sibling = ia; + b.m_next_sibling = ns; + } + else + { + C4_NEVER_REACH(); + } + } + _RYML_CB_ASSERT(m_callbacks, a.m_next_sibling != ia); + _RYML_CB_ASSERT(m_callbacks, a.m_prev_sibling != ia); + _RYML_CB_ASSERT(m_callbacks, b.m_next_sibling != ib); + _RYML_CB_ASSERT(m_callbacks, b.m_prev_sibling != ib); + + if(a.m_parent != ib && b.m_parent != ia) + { + std::swap(a.m_parent, b.m_parent); + } + else + { + if(a.m_parent == ib && b.m_parent != ia) + { + a.m_parent = b.m_parent; + b.m_parent = ia; + } + else if(a.m_parent != ib && b.m_parent == ia) + { + b.m_parent = a.m_parent; + a.m_parent = ib; + } + else + { + C4_NEVER_REACH(); + } + } +} + +//----------------------------------------------------------------------------- +void Tree::_copy_hierarchy(id_type dst_, id_type src_) +{ + auto const& C4_RESTRICT src = *_p(src_); + auto & C4_RESTRICT dst = *_p(dst_); + auto & C4_RESTRICT prt = *_p(src.m_parent); + for(id_type i = src.m_first_child; i != NONE; i = next_sibling(i)) + { + _p(i)->m_parent = dst_; + } + if(src.m_prev_sibling != NONE) + { + _p(src.m_prev_sibling)->m_next_sibling = dst_; + } + if(src.m_next_sibling != NONE) + { + _p(src.m_next_sibling)->m_prev_sibling = dst_; + } + if(prt.m_first_child == src_) + { + prt.m_first_child = dst_; + } + if(prt.m_last_child == src_) + { + prt.m_last_child = dst_; + } + dst.m_parent = src.m_parent; + dst.m_first_child = src.m_first_child; + dst.m_last_child = src.m_last_child; + dst.m_prev_sibling = src.m_prev_sibling; + dst.m_next_sibling = src.m_next_sibling; +} + +//----------------------------------------------------------------------------- +void Tree::_swap_props(id_type n_, id_type m_) +{ + NodeData &C4_RESTRICT n = *_p(n_); + NodeData &C4_RESTRICT m = *_p(m_); + std::swap(n.m_type, m.m_type); + std::swap(n.m_key, m.m_key); + std::swap(n.m_val, m.m_val); +} + +//----------------------------------------------------------------------------- +void Tree::move(id_type node, id_type after) +{ + _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, node != after); + _RYML_CB_ASSERT(m_callbacks, ! is_root(node)); + _RYML_CB_ASSERT(m_callbacks, (after == NONE) || (has_sibling(node, after) && has_sibling(after, node))); + + _rem_hierarchy(node); + _set_hierarchy(node, parent(node), after); +} + +//----------------------------------------------------------------------------- + +void Tree::move(id_type node, id_type new_parent, id_type after) +{ + _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, node != after); + _RYML_CB_ASSERT(m_callbacks, new_parent != NONE); + _RYML_CB_ASSERT(m_callbacks, new_parent != node); + _RYML_CB_ASSERT(m_callbacks, new_parent != after); + _RYML_CB_ASSERT(m_callbacks, ! is_root(node)); + + _rem_hierarchy(node); + _set_hierarchy(node, new_parent, after); +} + +id_type Tree::move(Tree *src, id_type node, id_type new_parent, id_type after) +{ + _RYML_CB_ASSERT(m_callbacks, src != nullptr); + _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, new_parent != NONE); + _RYML_CB_ASSERT(m_callbacks, new_parent != after); + + id_type dup = duplicate(src, node, new_parent, after); + src->remove(node); + return dup; +} + +void Tree::set_root_as_stream() +{ + id_type root = root_id(); + if(is_stream(root)) + return; + // don't use _add_flags() because it's checked and will fail + if(!has_children(root)) + { + if(is_val(root)) + { + _p(root)->m_type.add(SEQ); + id_type next_doc = append_child(root); + _copy_props_wo_key(next_doc, root); + _p(next_doc)->m_type.add(DOC); + _p(next_doc)->m_type.rem(SEQ); + } + _p(root)->m_type = STREAM; + return; + } + _RYML_CB_ASSERT(m_callbacks, !has_key(root)); + id_type next_doc = append_child(root); + _copy_props_wo_key(next_doc, root); + _add_flags(next_doc, DOC); + for(id_type prev = NONE, ch = first_child(root), next = next_sibling(ch); ch != NONE; ) + { + if(ch == next_doc) + break; + move(ch, next_doc, prev); + prev = ch; + ch = next; + next = next_sibling(next); + } + _p(root)->m_type = STREAM; +} + + +//----------------------------------------------------------------------------- +void Tree::remove_children(id_type node) +{ + _RYML_CB_ASSERT(m_callbacks, get(node) != nullptr); + id_type ich = get(node)->m_first_child; + while(ich != NONE) + { + remove_children(ich); + _RYML_CB_ASSERT(m_callbacks, get(ich) != nullptr); + id_type next = get(ich)->m_next_sibling; + _release(ich); + if(ich == get(node)->m_last_child) + break; + ich = next; + } +} + +bool Tree::change_type(id_type node, NodeType type) +{ + _RYML_CB_ASSERT(m_callbacks, type.is_val() || type.is_map() || type.is_seq()); + _RYML_CB_ASSERT(m_callbacks, type.is_val() + type.is_map() + type.is_seq() == 1); + _RYML_CB_ASSERT(m_callbacks, type.has_key() == has_key(node) || (has_key(node) && !type.has_key())); + NodeData *d = _p(node); + if(type.is_map() && is_map(node)) + return false; + else if(type.is_seq() && is_seq(node)) + return false; + else if(type.is_val() && is_val(node)) + return false; + d->m_type = (d->m_type & (~(MAP|SEQ|VAL))) | type; + remove_children(node); + return true; +} + + +//----------------------------------------------------------------------------- +id_type Tree::duplicate(id_type node, id_type parent, id_type after) +{ + return duplicate(this, node, parent, after); +} + +id_type Tree::duplicate(Tree const* src, id_type node, id_type parent, id_type after) +{ + _RYML_CB_ASSERT(m_callbacks, src != nullptr); + _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, parent != NONE); + _RYML_CB_ASSERT(m_callbacks, ! src->is_root(node)); + + id_type copy = _claim(); + + _copy_props(copy, src, node); + _set_hierarchy(copy, parent, after); + duplicate_children(src, node, copy, NONE); + + return copy; +} + +//----------------------------------------------------------------------------- +id_type Tree::duplicate_children(id_type node, id_type parent, id_type after) +{ + return duplicate_children(this, node, parent, after); +} + +id_type Tree::duplicate_children(Tree const* src, id_type node, id_type parent, id_type after) +{ + _RYML_CB_ASSERT(m_callbacks, src != nullptr); + _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, parent != NONE); + _RYML_CB_ASSERT(m_callbacks, after == NONE || has_child(parent, after)); + + id_type prev = after; + for(id_type i = src->first_child(node); i != NONE; i = src->next_sibling(i)) + { + prev = duplicate(src, i, parent, prev); + } + + return prev; +} + +//----------------------------------------------------------------------------- +void Tree::duplicate_contents(id_type node, id_type where) +{ + duplicate_contents(this, node, where); +} + +void Tree::duplicate_contents(Tree const *src, id_type node, id_type where) +{ + _RYML_CB_ASSERT(m_callbacks, src != nullptr); + _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, where != NONE); + _copy_props_wo_key(where, src, node); + duplicate_children(src, node, where, last_child(where)); +} + +//----------------------------------------------------------------------------- +id_type Tree::duplicate_children_no_rep(id_type node, id_type parent, id_type after) +{ + return duplicate_children_no_rep(this, node, parent, after); +} + +id_type Tree::duplicate_children_no_rep(Tree const *src, id_type node, id_type parent, id_type after) +{ + _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, parent != NONE); + _RYML_CB_ASSERT(m_callbacks, after == NONE || has_child(parent, after)); + + // don't loop using pointers as there may be a relocation + + // find the position where "after" is + id_type after_pos = NONE; + if(after != NONE) + { + for(id_type i = first_child(parent), icount = 0; i != NONE; ++icount, i = next_sibling(i)) + { + if(i == after) + { + after_pos = icount; + break; + } + } + _RYML_CB_ASSERT(m_callbacks, after_pos != NONE); + } + + // for each child to be duplicated... + id_type prev = after; + for(id_type i = src->first_child(node); i != NONE; i = src->next_sibling(i)) + { + if(is_seq(parent)) + { + prev = duplicate(i, parent, prev); + } + else + { + _RYML_CB_ASSERT(m_callbacks, is_map(parent)); + // does the parent already have a node with key equal to that of the current duplicate? + id_type rep = NONE, rep_pos = NONE; + for(id_type j = first_child(parent), jcount = 0; j != NONE; ++jcount, j = next_sibling(j)) + { + if(key(j) == key(i)) + { + rep = j; + rep_pos = jcount; + break; + } + } + if(rep == NONE) // there is no repetition; just duplicate + { + prev = duplicate(src, i, parent, prev); + } + else // yes, there is a repetition + { + if(after_pos != NONE && rep_pos < after_pos) + { + // rep is located before the node which will be inserted, + // and will be overridden by the duplicate. So replace it. + remove(rep); + prev = duplicate(src, i, parent, prev); + } + else if(prev == NONE) + { + // first iteration with prev = after = NONE and repetition + prev = rep; + } + else if(rep != prev) + { + // rep is located after the node which will be inserted + // and overrides it. So move the rep into this node's place. + move(rep, prev); + prev = rep; + } + } // there's a repetition + } + } + + return prev; +} + + +//----------------------------------------------------------------------------- + +void Tree::merge_with(Tree const *src, id_type src_node, id_type dst_node) +{ + _RYML_CB_ASSERT(m_callbacks, src != nullptr); + if(src_node == NONE) + src_node = src->root_id(); + if(dst_node == NONE) + dst_node = root_id(); + _RYML_CB_ASSERT(m_callbacks, src->has_val(src_node) || src->is_seq(src_node) || src->is_map(src_node)); + if(src->has_val(src_node)) + { + type_bits mask_src = ~STYLE; // keep the existing style if it is already a val + if( ! has_val(dst_node)) + { + if(has_children(dst_node)) + remove_children(dst_node); + mask_src |= VAL_STYLE; // copy the src style + } + if(src->is_keyval(src_node)) + { + _copy_props(dst_node, src, src_node, mask_src); + } + else + { + _RYML_CB_ASSERT(m_callbacks, src->is_val(src_node)); + _copy_props_wo_key(dst_node, src, src_node, mask_src); + } + } + else if(src->is_seq(src_node)) + { + if( ! is_seq(dst_node)) + { + if(has_children(dst_node)) + remove_children(dst_node); + _clear_type(dst_node); + if(src->has_key(src_node)) + to_seq(dst_node, src->key(src_node)); + else + to_seq(dst_node); + _p(dst_node)->m_type = src->_p(src_node)->m_type; + } + for(id_type sch = src->first_child(src_node); sch != NONE; sch = src->next_sibling(sch)) + { + id_type dch = append_child(dst_node); + _copy_props_wo_key(dch, src, sch); + merge_with(src, sch, dch); + } + } + else + { + _RYML_CB_ASSERT(m_callbacks, src->is_map(src_node)); + if( ! is_map(dst_node)) + { + if(has_children(dst_node)) + remove_children(dst_node); + _clear_type(dst_node); + if(src->has_key(src_node)) + to_map(dst_node, src->key(src_node)); + else + to_map(dst_node); + _p(dst_node)->m_type = src->_p(src_node)->m_type; + } + for(id_type sch = src->first_child(src_node); sch != NONE; sch = src->next_sibling(sch)) + { + id_type dch = find_child(dst_node, src->key(sch)); + if(dch == NONE) + { + dch = append_child(dst_node); + _copy_props(dch, src, sch); + } + merge_with(src, sch, dch); + } + } +} + + +//----------------------------------------------------------------------------- + +void Tree::resolve() +{ + if(m_size == 0) + return; + ReferenceResolver rr; + resolve(&rr); +} + +void Tree::resolve(ReferenceResolver *C4_RESTRICT rr) +{ + if(m_size == 0) + return; + rr->resolve(this); +} + + +//----------------------------------------------------------------------------- + +id_type Tree::num_children(id_type node) const +{ + id_type count = 0; + for(id_type i = first_child(node); i != NONE; i = next_sibling(i)) + ++count; + return count; +} + +id_type Tree::child(id_type node, id_type pos) const +{ + _RYML_CB_ASSERT(m_callbacks, node != NONE); + id_type count = 0; + for(id_type i = first_child(node); i != NONE; i = next_sibling(i)) + { + if(count++ == pos) + return i; + } + return NONE; +} + +id_type Tree::child_pos(id_type node, id_type ch) const +{ + _RYML_CB_ASSERT(m_callbacks, node != NONE); + id_type count = 0; + for(id_type i = first_child(node); i != NONE; i = next_sibling(i)) + { + if(i == ch) + return count; + ++count; + } + return NONE; +} + +#if defined(__clang__) +# pragma clang diagnostic push +# pragma GCC diagnostic ignored "-Wnull-dereference" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# if __GNUC__ >= 6 +# pragma GCC diagnostic ignored "-Wnull-dereference" +# endif +# if __GNUC__ > 9 +# pragma GCC diagnostic ignored "-Wanalyzer-null-dereference" +# endif +#endif + +id_type Tree::find_child(id_type node, csubstr const& name) const +{ + _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, is_map(node)); + if(get(node)->m_first_child == NONE) + { + _RYML_CB_ASSERT(m_callbacks, _p(node)->m_last_child == NONE); + return NONE; + } + else + { + _RYML_CB_ASSERT(m_callbacks, _p(node)->m_last_child != NONE); + } + for(id_type i = first_child(node); i != NONE; i = next_sibling(i)) + { + if(_p(i)->m_key.scalar == name) + { + return i; + } + } + return NONE; +} + +#if defined(__clang__) +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#endif + +namespace { +id_type depth_desc_(Tree const& C4_RESTRICT t, id_type id, id_type currdepth=0, id_type maxdepth=0) +{ + maxdepth = currdepth > maxdepth ? currdepth : maxdepth; + for(id_type child = t.first_child(id); child != NONE; child = t.next_sibling(child)) + { + const id_type d = depth_desc_(t, child, currdepth+1, maxdepth); + maxdepth = d > maxdepth ? d : maxdepth; + } + return maxdepth; +} +} + +id_type Tree::depth_desc(id_type node) const +{ + _RYML_CB_ASSERT(m_callbacks, node != NONE); + return depth_desc_(*this, node); +} + +id_type Tree::depth_asc(id_type node) const +{ + _RYML_CB_ASSERT(m_callbacks, node != NONE); + id_type depth = 0; + while(!is_root(node)) + { + ++depth; + node = parent(node); + } + return depth; +} + + +//----------------------------------------------------------------------------- + +void Tree::to_val(id_type node, csubstr val, type_bits more_flags) +{ + _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); + _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || ! parent_is_map(node)); + _set_flags(node, VAL|more_flags); + _p(node)->m_key.clear(); + _p(node)->m_val = val; +} + +void Tree::to_keyval(id_type node, csubstr key, csubstr val, type_bits more_flags) +{ + _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); + _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || parent_is_map(node)); + _set_flags(node, KEYVAL|more_flags); + _p(node)->m_key = key; + _p(node)->m_val = val; +} + +void Tree::to_map(id_type node, type_bits more_flags) +{ + _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); + _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || ! parent_is_map(node)); // parent must not have children with keys + _set_flags(node, MAP|more_flags); + _p(node)->m_key.clear(); + _p(node)->m_val.clear(); +} + +void Tree::to_map(id_type node, csubstr key, type_bits more_flags) +{ + _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); + _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || parent_is_map(node)); + _set_flags(node, KEY|MAP|more_flags); + _p(node)->m_key = key; + _p(node)->m_val.clear(); +} + +void Tree::to_seq(id_type node, type_bits more_flags) +{ + _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); + _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || parent_is_seq(node)); + _set_flags(node, SEQ|more_flags); + _p(node)->m_key.clear(); + _p(node)->m_val.clear(); +} + +void Tree::to_seq(id_type node, csubstr key, type_bits more_flags) +{ + _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); + _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || parent_is_map(node)); + _set_flags(node, KEY|SEQ|more_flags); + _p(node)->m_key = key; + _p(node)->m_val.clear(); +} + +void Tree::to_doc(id_type node, type_bits more_flags) +{ + _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); + _set_flags(node, DOC|more_flags); + _p(node)->m_key.clear(); + _p(node)->m_val.clear(); +} + +void Tree::to_stream(id_type node, type_bits more_flags) +{ + _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); + _set_flags(node, STREAM|more_flags); + _p(node)->m_key.clear(); + _p(node)->m_val.clear(); +} + + +//----------------------------------------------------------------------------- +id_type Tree::num_tag_directives() const +{ + // this assumes we have a very small number of tag directives + for(id_type i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) + if(m_tag_directives[i].handle.empty()) + return i; + return RYML_MAX_TAG_DIRECTIVES; +} + +void Tree::clear_tag_directives() +{ + for(TagDirective &td : m_tag_directives) + td = {}; +} + +id_type Tree::add_tag_directive(TagDirective const& td) +{ + _RYML_CB_CHECK(m_callbacks, !td.handle.empty()); + _RYML_CB_CHECK(m_callbacks, !td.prefix.empty()); + _RYML_CB_CHECK(m_callbacks, td.handle.begins_with('!')); + _RYML_CB_CHECK(m_callbacks, td.handle.ends_with('!')); + // https://yaml.org/spec/1.2.2/#rule-ns-word-char + _RYML_CB_CHECK(m_callbacks, td.handle == '!' || td.handle == "!!" || td.handle.trim('!').first_not_of("01234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-") == npos); + id_type pos = num_tag_directives(); + _RYML_CB_CHECK(m_callbacks, pos < RYML_MAX_TAG_DIRECTIVES); + m_tag_directives[pos] = td; + return pos; +} + +bool Tree::add_tag_directive(csubstr directive_) +{ + TagDirective td; + if(td.create_from_str(directive_, this)) + { + add_tag_directive(td); + return true; + } + return false; +} + +size_t Tree::resolve_tag(substr output, csubstr tag, id_type node_id) const +{ + // lookup from the end. We want to find the first directive that + // matches the tag and has a target node id leq than the given + // node_id. + for(id_type i = RYML_MAX_TAG_DIRECTIVES-1; i != (id_type)-1; --i) + { + auto const& td = m_tag_directives[i]; + if(td.handle.empty()) + continue; + if(tag.begins_with(td.handle) && td.next_node_id <= node_id) + return td.transform(tag, output, m_callbacks); + } + if(tag.begins_with('!')) + { + if(is_custom_tag(tag)) + { + _RYML_CB_ERR(m_callbacks, "tag directive not found"); + } + } + return 0; // return 0 to signal that the tag is local and cannot be resolved +} + +namespace { +csubstr _transform_tag(Tree *t, csubstr tag, id_type node) +{ + _c4dbgpf("[{}] resolving tag ~~~{}~~~", node, tag); + size_t required_size = t->resolve_tag(substr{}, tag, node); + if(!required_size) + { + if(tag.begins_with("!<")) + tag = tag.sub(1); + _c4dbgpf("[{}] resolved tag: ~~~{}~~~", node, tag); + return tag; + } + const char *prev_arena = t->arena().str;(void)prev_arena; + substr buf = t->alloc_arena(required_size); + _RYML_CB_ASSERT(t->m_callbacks, t->arena().str == prev_arena); + size_t actual_size = t->resolve_tag(buf, tag, node); + _RYML_CB_ASSERT(t->m_callbacks, actual_size <= required_size); + _c4dbgpf("[{}] resolved tag: ~~~{}~~~", node, buf.first(actual_size)); + return buf.first(actual_size); +} +void _resolve_tags(Tree *t, id_type node) +{ + NodeData *C4_RESTRICT d = t->_p(node); + if(d->m_type & KEYTAG) + d->m_key.tag = _transform_tag(t, d->m_key.tag, node); + if(d->m_type & VALTAG) + d->m_val.tag = _transform_tag(t, d->m_val.tag, node); + for(id_type child = t->first_child(node); child != NONE; child = t->next_sibling(child)) + _resolve_tags(t, child); +} +size_t _count_resolved_tags_size(Tree const* t, id_type node) +{ + size_t sz = 0; + NodeData const* C4_RESTRICT d = t->_p(node); + if(d->m_type & KEYTAG) + sz += t->resolve_tag(substr{}, d->m_key.tag, node); + if(d->m_type & VALTAG) + sz += t->resolve_tag(substr{}, d->m_val.tag, node); + for(id_type child = t->first_child(node); child != NONE; child = t->next_sibling(child)) + sz += _count_resolved_tags_size(t, child); + return sz; +} +void _normalize_tags(Tree *t, id_type node) +{ + NodeData *C4_RESTRICT d = t->_p(node); + if(d->m_type & KEYTAG) + d->m_key.tag = normalize_tag(d->m_key.tag); + if(d->m_type & VALTAG) + d->m_val.tag = normalize_tag(d->m_val.tag); + for(id_type child = t->first_child(node); child != NONE; child = t->next_sibling(child)) + _normalize_tags(t, child); +} +void _normalize_tags_long(Tree *t, id_type node) +{ + NodeData *C4_RESTRICT d = t->_p(node); + if(d->m_type & KEYTAG) + d->m_key.tag = normalize_tag_long(d->m_key.tag); + if(d->m_type & VALTAG) + d->m_val.tag = normalize_tag_long(d->m_val.tag); + for(id_type child = t->first_child(node); child != NONE; child = t->next_sibling(child)) + _normalize_tags_long(t, child); +} +} // namespace + +void Tree::resolve_tags() +{ + if(empty()) + return; + size_t needed_size = _count_resolved_tags_size(this, root_id()); + if(needed_size) + reserve_arena(arena_size() + needed_size); + _resolve_tags(this, root_id()); +} + +void Tree::normalize_tags() +{ + if(empty()) + return; + _normalize_tags(this, root_id()); +} + +void Tree::normalize_tags_long() +{ + if(empty()) + return; + _normalize_tags_long(this, root_id()); +} + + +//----------------------------------------------------------------------------- + +csubstr Tree::lookup_result::resolved() const +{ + csubstr p = path.first(path_pos); + if(p.ends_with('.')) + p = p.first(p.len-1); + return p; +} + +csubstr Tree::lookup_result::unresolved() const +{ + return path.sub(path_pos); +} + +void Tree::_advance(lookup_result *r, size_t more) const +{ + r->path_pos += more; + if(r->path.sub(r->path_pos).begins_with('.')) + ++r->path_pos; +} + +Tree::lookup_result Tree::lookup_path(csubstr path, id_type start) const +{ + if(start == NONE) + start = root_id(); + lookup_result r(path, start); + if(path.empty()) + return r; + _lookup_path(&r); + if(r.target == NONE && r.closest == start) + r.closest = NONE; + return r; +} + +id_type Tree::lookup_path_or_modify(csubstr default_value, csubstr path, id_type start) +{ + id_type target = _lookup_path_or_create(path, start); + if(parent_is_map(target)) + to_keyval(target, key(target), default_value); + else + to_val(target, default_value); + return target; +} + +id_type Tree::lookup_path_or_modify(Tree const *src, id_type src_node, csubstr path, id_type start) +{ + id_type target = _lookup_path_or_create(path, start); + merge_with(src, src_node, target); + return target; +} + +id_type Tree::_lookup_path_or_create(csubstr path, id_type start) +{ + if(start == NONE) + start = root_id(); + lookup_result r(path, start); + _lookup_path(&r); + if(r.target != NONE) + { + C4_ASSERT(r.unresolved().empty()); + return r.target; + } + _lookup_path_modify(&r); + return r.target; +} + +void Tree::_lookup_path(lookup_result *r) const +{ + C4_ASSERT( ! r->unresolved().empty()); + _lookup_path_token parent{"", type(r->closest)}; + id_type node; + do + { + node = _next_node(r, &parent); + if(node != NONE) + r->closest = node; + if(r->unresolved().empty()) + { + r->target = node; + return; + } + } while(node != NONE); +} + +void Tree::_lookup_path_modify(lookup_result *r) +{ + C4_ASSERT( ! r->unresolved().empty()); + _lookup_path_token parent{"", type(r->closest)}; + id_type node; + do + { + node = _next_node_modify(r, &parent); + if(node != NONE) + r->closest = node; + if(r->unresolved().empty()) + { + r->target = node; + return; + } + } while(node != NONE); +} + +id_type Tree::_next_node(lookup_result * r, _lookup_path_token *parent) const +{ + _lookup_path_token token = _next_token(r, *parent); + if( ! token) + return NONE; + + id_type node = NONE; + csubstr prev = token.value; + if(token.type == MAP || token.type == SEQ) + { + _RYML_CB_ASSERT(m_callbacks, !token.value.begins_with('[')); + //_RYML_CB_ASSERT(m_callbacks, is_container(r->closest) || r->closest == NONE); + _RYML_CB_ASSERT(m_callbacks, is_map(r->closest)); + node = find_child(r->closest, token.value); + } + else if(token.type == KEYVAL) + { + _RYML_CB_ASSERT(m_callbacks, r->unresolved().empty()); + if(is_map(r->closest)) + node = find_child(r->closest, token.value); + } + else if(token.type == KEY) + { + _RYML_CB_ASSERT(m_callbacks, token.value.begins_with('[') && token.value.ends_with(']')); + token.value = token.value.offs(1, 1).trim(' '); + id_type idx = 0; + _RYML_CB_CHECK(m_callbacks, from_chars(token.value, &idx)); + node = child(r->closest, idx); + } + else + { + C4_NEVER_REACH(); + } + + if(node != NONE) + { + *parent = token; + } + else + { + csubstr p = r->path.sub(r->path_pos > 0 ? r->path_pos - 1 : r->path_pos); + r->path_pos -= prev.len; + if(p.begins_with('.')) + r->path_pos -= 1u; + } + + return node; +} + +id_type Tree::_next_node_modify(lookup_result * r, _lookup_path_token *parent) +{ + _lookup_path_token token = _next_token(r, *parent); + if( ! token) + return NONE; + + id_type node = NONE; + if(token.type == MAP || token.type == SEQ) + { + _RYML_CB_ASSERT(m_callbacks, !token.value.begins_with('[')); + //_RYML_CB_ASSERT(m_callbacks, is_container(r->closest) || r->closest == NONE); + if( ! is_container(r->closest)) + { + if(has_key(r->closest)) + to_map(r->closest, key(r->closest)); + else + to_map(r->closest); + } + else + { + if(is_map(r->closest)) + node = find_child(r->closest, token.value); + else + { + id_type pos = NONE; + _RYML_CB_CHECK(m_callbacks, c4::atox(token.value, &pos)); + _RYML_CB_ASSERT(m_callbacks, pos != NONE); + node = child(r->closest, pos); + } + } + if(node == NONE) + { + _RYML_CB_ASSERT(m_callbacks, is_map(r->closest)); + node = append_child(r->closest); + NodeData *n = _p(node); + n->m_key.scalar = token.value; + n->m_type.add(KEY); + } + } + else if(token.type == KEYVAL) + { + _RYML_CB_ASSERT(m_callbacks, r->unresolved().empty()); + if(is_map(r->closest)) + { + node = find_child(r->closest, token.value); + if(node == NONE) + node = append_child(r->closest); + } + else + { + _RYML_CB_ASSERT(m_callbacks, !is_seq(r->closest)); + _add_flags(r->closest, MAP); + node = append_child(r->closest); + } + NodeData *n = _p(node); + n->m_key.scalar = token.value; + n->m_val.scalar = ""; + n->m_type.add(KEYVAL); + } + else if(token.type == KEY) + { + _RYML_CB_ASSERT(m_callbacks, token.value.begins_with('[') && token.value.ends_with(']')); + token.value = token.value.offs(1, 1).trim(' '); + id_type idx; + if( ! from_chars(token.value, &idx)) + return NONE; + if( ! is_container(r->closest)) + { + if(has_key(r->closest)) + { + csubstr k = key(r->closest); + _clear_type(r->closest); + to_seq(r->closest, k); + } + else + { + _clear_type(r->closest); + to_seq(r->closest); + } + } + _RYML_CB_ASSERT(m_callbacks, is_container(r->closest)); + node = child(r->closest, idx); + if(node == NONE) + { + _RYML_CB_ASSERT(m_callbacks, num_children(r->closest) <= idx); + for(id_type i = num_children(r->closest); i <= idx; ++i) + { + node = append_child(r->closest); + if(i < idx) + { + if(is_map(r->closest)) + to_keyval(node, /*"~"*/{}, /*"~"*/{}); + else if(is_seq(r->closest)) + to_val(node, /*"~"*/{}); + } + } + } + } + else + { + C4_NEVER_REACH(); + } + + _RYML_CB_ASSERT(m_callbacks, node != NONE); + *parent = token; + return node; +} + +/* types of tokens: + * - seeing "map." ---> "map"/MAP + * - finishing "scalar" ---> "scalar"/KEYVAL + * - seeing "seq[n]" ---> "seq"/SEQ (--> "[n]"/KEY) + * - seeing "[n]" ---> "[n]"/KEY + */ +Tree::_lookup_path_token Tree::_next_token(lookup_result *r, _lookup_path_token const& parent) const +{ + csubstr unres = r->unresolved(); + if(unres.empty()) + return {}; + + // is it an indexation like [0], [1], etc? + if(unres.begins_with('[')) + { + size_t pos = unres.find(']'); + if(pos == csubstr::npos) + return {}; + csubstr idx = unres.first(pos + 1); + _advance(r, pos + 1); + return {idx, KEY}; + } + + // no. so it must be a name + size_t pos = unres.first_of(".["); + if(pos == csubstr::npos) + { + _advance(r, unres.len); + NodeType t; + if(( ! parent) || parent.type.is_seq()) + return {unres, VAL}; + return {unres, KEYVAL}; + } + + // it's either a map or a seq + _RYML_CB_ASSERT(m_callbacks, unres[pos] == '.' || unres[pos] == '['); + if(unres[pos] == '.') + { + _RYML_CB_ASSERT(m_callbacks, pos != 0); + _advance(r, pos + 1); + return {unres.first(pos), MAP}; + } + + _RYML_CB_ASSERT(m_callbacks, unres[pos] == '['); + _advance(r, pos); + return {unres.first(pos), SEQ}; +} + + +} // namespace ryml +} // namespace c4 + + +C4_SUPPRESS_WARNING_GCC_CLANG_POP +C4_SUPPRESS_WARNING_MSVC_POP + +#endif /* RYML_SINGLE_HDR_DEFINE_NOW */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/tree.cpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/parse_engine.def.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/parse_engine.def.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_PARSE_ENGINE_DEF_HPP_ +#define _C4_YML_PARSE_ENGINE_DEF_HPP_ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/parse_engine.hpp +//#include "c4/yml/parse_engine.hpp" +#if !defined(C4_YML_PARSE_ENGINE_HPP_) && !defined(_C4_YML_PARSE_ENGINE_HPP_) +#error "amalgamate: file c4/yml/parse_engine.hpp must have been included at this point" +#endif /* C4_YML_PARSE_ENGINE_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/error.hpp +//#include "c4/error.hpp" +#if !defined(C4_ERROR_HPP_) && !defined(_C4_ERROR_HPP_) +#error "amalgamate: file c4/error.hpp must have been included at this point" +#endif /* C4_ERROR_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/charconv.hpp +//#include "c4/charconv.hpp" +#if !defined(C4_CHARCONV_HPP_) && !defined(_C4_CHARCONV_HPP_) +#error "amalgamate: file c4/charconv.hpp must have been included at this point" +#endif /* C4_CHARCONV_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/utf.hpp +//#include "c4/utf.hpp" +#if !defined(C4_UTF_HPP_) && !defined(_C4_UTF_HPP_) +#error "amalgamate: file c4/utf.hpp must have been included at this point" +#endif /* C4_UTF_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/dump.hpp +//#include +#if !defined(C4_DUMP_HPP_) && !defined(_C4_DUMP_HPP_) +#error "amalgamate: file c4/dump.hpp must have been included at this point" +#endif /* C4_DUMP_HPP_ */ + + +//included above: +//#include + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp +//#include "c4/yml/detail/parser_dbg.hpp" +#if !defined(C4_YML_DETAIL_PARSER_DBG_HPP_) && !defined(_C4_YML_DETAIL_PARSER_DBG_HPP_) +#error "amalgamate: file c4/yml/detail/parser_dbg.hpp must have been included at this point" +#endif /* C4_YML_DETAIL_PARSER_DBG_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/filter_processor.hpp +//#include "c4/yml/filter_processor.hpp" +#if !defined(C4_YML_FILTER_PROCESSOR_HPP_) && !defined(_C4_YML_FILTER_PROCESSOR_HPP_) +#error "amalgamate: file c4/yml/filter_processor.hpp must have been included at this point" +#endif /* C4_YML_FILTER_PROCESSOR_HPP_ */ + +#ifdef RYML_DBG +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/print.hpp +//#include "c4/yml/detail/print.hpp" +#if !defined(C4_YML_DETAIL_PRINT_HPP_) && !defined(_C4_YML_DETAIL_PRINT_HPP_) +#error "amalgamate: file c4/yml/detail/print.hpp must have been included at this point" +#endif /* C4_YML_DETAIL_PRINT_HPP_ */ + +#endif + + +#if defined(RYML_WITH_TAB_TOKENS) +#define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__ +#define _RYML_WITHOUT_TAB_TOKENS(...) +#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with +#else +#define _RYML_WITH_TAB_TOKENS(...) +#define _RYML_WITHOUT_TAB_TOKENS(...) __VA_ARGS__ +#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without +#endif - if(a.m_prev_sibling != ib && b.m_prev_sibling != ia && - a.m_next_sibling != ib && b.m_next_sibling != ia) - { - if(a.m_prev_sibling != NONE && a.m_prev_sibling != ib) - _p(a.m_prev_sibling)->m_next_sibling = ib; - if(a.m_next_sibling != NONE && a.m_next_sibling != ib) - _p(a.m_next_sibling)->m_prev_sibling = ib; - if(b.m_prev_sibling != NONE && b.m_prev_sibling != ia) - _p(b.m_prev_sibling)->m_next_sibling = ia; - if(b.m_next_sibling != NONE && b.m_next_sibling != ia) - _p(b.m_next_sibling)->m_prev_sibling = ia; - std::swap(a.m_prev_sibling, b.m_prev_sibling); - std::swap(a.m_next_sibling, b.m_next_sibling); - } - else + +// scaffold: +#define _c4dbgnextline() \ + do { \ + _c4dbgq("\n-----------"); \ + _c4dbgt("handling line={}, offset={}B", \ + m_evt_handler->m_curr->pos.line, \ + m_evt_handler->m_curr->pos.offset); \ + } while(0) + + +#if defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable: 4296/*expression is always 'boolean_value'*/) +# pragma warning(disable: 4702/*unreachable code*/) +#elif defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0. +# pragma clang diagnostic ignored "-Wformat-nonliteral" +# pragma clang diagnostic ignored "-Wold-style-cast" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0. +# pragma GCC diagnostic ignored "-Wformat-nonliteral" +# pragma GCC diagnostic ignored "-Wold-style-cast" +# if __GNUC__ >= 7 +# pragma GCC diagnostic ignored "-Wduplicated-branches" +# endif +#endif + +namespace c4 { +namespace yml { + +namespace { + +C4_HOT C4_ALWAYS_INLINE bool _is_blck_token(csubstr s) noexcept +{ + RYML_ASSERT(s.len > 0); + RYML_ASSERT(s.str[0] == '-' || s.str[0] == ':' || s.str[0] == '?'); + return ((s.len == 1) || ((s.str[1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[1] == '\t')))); +} + +inline bool _is_doc_begin_token(csubstr s) +{ + RYML_ASSERT(s.begins_with('-')); + RYML_ASSERT(!s.ends_with("\n")); + RYML_ASSERT(!s.ends_with("\r")); + return (s.len >= 3 && s.str[1] == '-' && s.str[2] == '-') + && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t'))); +} + +inline bool _is_doc_end_token(csubstr s) +{ + RYML_ASSERT(s.begins_with('.')); + RYML_ASSERT(!s.ends_with("\n")); + RYML_ASSERT(!s.ends_with("\r")); + return (s.len >= 3 && s.str[1] == '.' && s.str[2] == '.') + && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t'))); +} + +inline bool _is_doc_token(csubstr s) noexcept +{ + // + // NOTE: this function was failing under some scenarios when + // compiled with gcc -O2 (but not -O3 or -O1 or -O0), likely + // related to optimizer assumptions on the input string and + // possibly caused from UB around assignment to that string (the + // call site was in _scan_block()). For more details see: + // + // https://github.com/biojppm/rapidyaml/issues/440 + // + // The current version does not suffer this problem, but it may + // appear again. + // + if(s.len >= 3) { - if(a.m_next_sibling == ib) // n will go after m - { - _RYML_CB_ASSERT(m_callbacks, b.m_prev_sibling == ia); - if(a.m_prev_sibling != NONE) - { - _RYML_CB_ASSERT(m_callbacks, a.m_prev_sibling != ib); - _p(a.m_prev_sibling)->m_next_sibling = ib; - } - if(b.m_next_sibling != NONE) - { - _RYML_CB_ASSERT(m_callbacks, b.m_next_sibling != ia); - _p(b.m_next_sibling)->m_prev_sibling = ia; - } - size_t ns = b.m_next_sibling; - b.m_prev_sibling = a.m_prev_sibling; - b.m_next_sibling = ia; - a.m_prev_sibling = ib; - a.m_next_sibling = ns; - } - else if(a.m_prev_sibling == ib) // m will go after n - { - _RYML_CB_ASSERT(m_callbacks, b.m_next_sibling == ia); - if(b.m_prev_sibling != NONE) - { - _RYML_CB_ASSERT(m_callbacks, b.m_prev_sibling != ia); - _p(b.m_prev_sibling)->m_next_sibling = ia; - } - if(a.m_next_sibling != NONE) - { - _RYML_CB_ASSERT(m_callbacks, a.m_next_sibling != ib); - _p(a.m_next_sibling)->m_prev_sibling = ib; - } - size_t ns = b.m_prev_sibling; - a.m_prev_sibling = b.m_prev_sibling; - a.m_next_sibling = ib; - b.m_prev_sibling = ia; - b.m_next_sibling = ns; - } - else + switch(s.str[0]) { - C4_NEVER_REACH(); + case '-': + //return _is_doc_begin_token(s); // this was failing with gcc -O2 + return (s.str[1] == '-' && s.str[2] == '-') + && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t'))); + case '.': + //return _is_doc_end_token(s); // this was failing with gcc -O2 + return (s.str[1] == '.' && s.str[2] == '.') + && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t'))); } } - _RYML_CB_ASSERT(m_callbacks, a.m_next_sibling != ia); - _RYML_CB_ASSERT(m_callbacks, a.m_prev_sibling != ia); - _RYML_CB_ASSERT(m_callbacks, b.m_next_sibling != ib); - _RYML_CB_ASSERT(m_callbacks, b.m_prev_sibling != ib); + return false; +} - if(a.m_parent != ib && b.m_parent != ia) - { - std::swap(a.m_parent, b.m_parent); - } - else +inline size_t _is_special_json_scalar(csubstr s) +{ + RYML_ASSERT(s.len); + switch(s.str[0]) { - if(a.m_parent == ib && b.m_parent != ia) - { - a.m_parent = b.m_parent; - b.m_parent = ia; - } - else if(a.m_parent != ib && b.m_parent == ia) - { - b.m_parent = a.m_parent; - a.m_parent = ib; - } - else - { - C4_NEVER_REACH(); - } + case 'f': + if(s.len >= 5 && s.begins_with("false")) + return 5u; + break; + case 't': + if(s.len >= 4 && s.begins_with("true")) + return 4u; + break; + case 'n': + if(s.len >= 4 && s.begins_with("null")) + return 4u; + break; } + return 0u; } + //----------------------------------------------------------------------------- -void Tree::_copy_hierarchy(size_t dst_, size_t src_) + +C4_ALWAYS_INLINE size_t _extend_from_combined_newline(char nl, char following) { - auto const& C4_RESTRICT src = *_p(src_); - auto & C4_RESTRICT dst = *_p(dst_); - auto & C4_RESTRICT prt = *_p(src.m_parent); - for(size_t i = src.m_first_child; i != NONE; i = next_sibling(i)) - { - _p(i)->m_parent = dst_; - } - if(src.m_prev_sibling != NONE) - { - _p(src.m_prev_sibling)->m_next_sibling = dst_; - } - if(src.m_next_sibling != NONE) + return (nl == '\n' && following == '\r') || (nl == '\r' && following == '\n'); +} + +//! look for the next newline chars, and jump to the right of those +inline substr from_next_line(substr rem) +{ + size_t nlpos = rem.first_of("\r\n"); + if(nlpos == csubstr::npos) + return {}; + const char nl = rem[nlpos]; + rem = rem.right_of(nlpos); + if(rem.empty()) + return {}; + if(_extend_from_combined_newline(nl, rem.front())) + rem = rem.sub(1); + return rem; +} + + +//----------------------------------------------------------------------------- + +inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i) +{ + RYML_ASSERT(r[*i] == '\n'); + size_t numnl_following = 0; + ++(*i); + for( ; *i < r.len; ++(*i)) { - _p(src.m_next_sibling)->m_prev_sibling = dst_; + if(r.str[*i] == '\n') + ++numnl_following; + // skip leading whitespace + else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r') + ; + else + break; } - if(prt.m_first_child == src_) + return numnl_following; +} + +/** @p i is set to the first non whitespace character after the line + * @return the number of empty lines after the initial position */ +inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i, size_t indentation) +{ + RYML_ASSERT(r[*i] == '\n'); + size_t numnl_following = 0; + ++(*i); + if(indentation == 0) { - prt.m_first_child = dst_; + for( ; *i < r.len; ++(*i)) + { + if(r.str[*i] == '\n') + ++numnl_following; + // skip leading whitespace + else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r') + ; + else + break; + } } - if(prt.m_last_child == src_) + else { - prt.m_last_child = dst_; + for( ; *i < r.len; ++(*i)) + { + if(r.str[*i] == '\n') + { + ++numnl_following; + // skip the indentation after the newline + size_t stop = *i + indentation; + for( ; *i < r.len; ++(*i)) + { + if(r.str[*i] != ' ' && r.str[*i] != '\r') + break; + RYML_ASSERT(*i < stop); + } + C4_UNUSED(stop); + } + // skip leading whitespace + else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r') + ; + else + break; + } } - dst.m_parent = src.m_parent; - dst.m_first_child = src.m_first_child; - dst.m_last_child = src.m_last_child; - dst.m_prev_sibling = src.m_prev_sibling; - dst.m_next_sibling = src.m_next_sibling; + return numnl_following; } +} // anon namespace + + +//----------------------------------------------------------------------------- //----------------------------------------------------------------------------- -void Tree::_swap_props(size_t n_, size_t m_) +//----------------------------------------------------------------------------- + +template +ParseEngine::~ParseEngine() { - NodeData &C4_RESTRICT n = *_p(n_); - NodeData &C4_RESTRICT m = *_p(m_); - std::swap(n.m_type, m.m_type); - std::swap(n.m_key, m.m_key); - std::swap(n.m_val, m.m_val); + _free(); + _clr(); +} + +template +ParseEngine::ParseEngine(EventHandler *evt_handler, ParserOptions opts) + : m_options(opts) + , m_file() + , m_buf() + , m_evt_handler(evt_handler) + , m_pending_anchors() + , m_pending_tags() + , m_newline_offsets() + , m_newline_offsets_size(0) + , m_newline_offsets_capacity(0) + , m_newline_offsets_buf() +{ + RYML_CHECK(evt_handler); +} + +template +ParseEngine::ParseEngine(ParseEngine &&that) + : m_options(that.m_options) + , m_file(that.m_file) + , m_buf(that.m_buf) + , m_evt_handler(that.m_evt_handler) + , m_pending_anchors(that.m_pending_anchors) + , m_pending_tags(that.m_pending_tags) + , m_newline_offsets(that.m_newline_offsets) + , m_newline_offsets_size(that.m_newline_offsets_size) + , m_newline_offsets_capacity(that.m_newline_offsets_capacity) + , m_newline_offsets_buf(that.m_newline_offsets_buf) +{ + that._clr(); +} + +template +ParseEngine::ParseEngine(ParseEngine const& that) + : m_options(that.m_options) + , m_file(that.m_file) + , m_buf(that.m_buf) + , m_evt_handler(that.m_evt_handler) + , m_pending_anchors(that.m_pending_anchors) + , m_pending_tags(that.m_pending_tags) + , m_newline_offsets() + , m_newline_offsets_size() + , m_newline_offsets_capacity() + , m_newline_offsets_buf() +{ + if(that.m_newline_offsets_capacity) + { + _resize_locations(that.m_newline_offsets_capacity); + _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity); + memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t)); + m_newline_offsets_size = that.m_newline_offsets_size; + } } -//----------------------------------------------------------------------------- -void Tree::move(size_t node, size_t after) +template +ParseEngine& ParseEngine::operator=(ParseEngine &&that) { - _RYML_CB_ASSERT(m_callbacks, node != NONE); - _RYML_CB_ASSERT(m_callbacks, node != after); - _RYML_CB_ASSERT(m_callbacks, ! is_root(node)); - _RYML_CB_ASSERT(m_callbacks, (after == NONE) || (has_sibling(node, after) && has_sibling(after, node))); - - _rem_hierarchy(node); - _set_hierarchy(node, parent(node), after); + _free(); + m_options = (that.m_options); + m_file = (that.m_file); + m_buf = (that.m_buf); + m_evt_handler = that.m_evt_handler; + m_pending_anchors = that.m_pending_anchors; + m_pending_tags = that.m_pending_tags; + m_newline_offsets = (that.m_newline_offsets); + m_newline_offsets_size = (that.m_newline_offsets_size); + m_newline_offsets_capacity = (that.m_newline_offsets_capacity); + m_newline_offsets_buf = (that.m_newline_offsets_buf); + that._clr(); + return *this; } -//----------------------------------------------------------------------------- - -void Tree::move(size_t node, size_t new_parent, size_t after) +template +ParseEngine& ParseEngine::operator=(ParseEngine const& that) { - _RYML_CB_ASSERT(m_callbacks, node != NONE); - _RYML_CB_ASSERT(m_callbacks, node != after); - _RYML_CB_ASSERT(m_callbacks, new_parent != NONE); - _RYML_CB_ASSERT(m_callbacks, new_parent != node); - _RYML_CB_ASSERT(m_callbacks, new_parent != after); - _RYML_CB_ASSERT(m_callbacks, ! is_root(node)); - - _rem_hierarchy(node); - _set_hierarchy(node, new_parent, after); + _free(); + m_options = (that.m_options); + m_file = (that.m_file); + m_buf = (that.m_buf); + m_evt_handler = that.m_evt_handler; + m_pending_anchors = that.m_pending_anchors; + m_pending_tags = that.m_pending_tags; + if(that.m_newline_offsets_capacity > m_newline_offsets_capacity) + _resize_locations(that.m_newline_offsets_capacity); + _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity); + _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size); + memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t)); + m_newline_offsets_size = that.m_newline_offsets_size; + m_newline_offsets_buf = that.m_newline_offsets_buf; + return *this; } -size_t Tree::move(Tree *src, size_t node, size_t new_parent, size_t after) +template +void ParseEngine::_clr() { - _RYML_CB_ASSERT(m_callbacks, src != nullptr); - _RYML_CB_ASSERT(m_callbacks, node != NONE); - _RYML_CB_ASSERT(m_callbacks, new_parent != NONE); - _RYML_CB_ASSERT(m_callbacks, new_parent != after); - - size_t dup = duplicate(src, node, new_parent, after); - src->remove(node); - return dup; + m_options = {}; + m_file = {}; + m_buf = {}; + m_evt_handler = {}; + m_pending_anchors = {}; + m_pending_tags = {}; + m_newline_offsets = {}; + m_newline_offsets_size = {}; + m_newline_offsets_capacity = {}; + m_newline_offsets_buf = {}; } -void Tree::set_root_as_stream() +template +void ParseEngine::_free() { - size_t root = root_id(); - if(is_stream(root)) - return; - // don't use _add_flags() because it's checked and will fail - if(!has_children(root)) - { - if(is_val(root)) - { - _p(root)->m_type.add(SEQ); - size_t next_doc = append_child(root); - _copy_props_wo_key(next_doc, root); - _p(next_doc)->m_type.add(DOC); - _p(next_doc)->m_type.rem(SEQ); - } - _p(root)->m_type = STREAM; - return; - } - _RYML_CB_ASSERT(m_callbacks, !has_key(root)); - size_t next_doc = append_child(root); - _copy_props_wo_key(next_doc, root); - _add_flags(next_doc, DOC); - for(size_t prev = NONE, ch = first_child(root), next = next_sibling(ch); ch != NONE; ) + if(m_newline_offsets) { - if(ch == next_doc) - break; - move(ch, next_doc, prev); - prev = ch; - ch = next; - next = next_sibling(next); + _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity); + m_newline_offsets = nullptr; + m_newline_offsets_size = 0u; + m_newline_offsets_capacity = 0u; + m_newline_offsets_buf = 0u; } - _p(root)->m_type = STREAM; } //----------------------------------------------------------------------------- -void Tree::remove_children(size_t node) + +template +void ParseEngine::_reset() { - _RYML_CB_ASSERT(m_callbacks, get(node) != nullptr); - size_t ich = get(node)->m_first_child; - while(ich != NONE) + m_pending_anchors = {}; + m_pending_tags = {}; + if(m_options.locations()) { - remove_children(ich); - _RYML_CB_ASSERT(m_callbacks, get(ich) != nullptr); - size_t next = get(ich)->m_next_sibling; - _release(ich); - if(ich == get(node)->m_last_child) - break; - ich = next; + _prepare_locations(); } -} - -bool Tree::change_type(size_t node, NodeType type) -{ - _RYML_CB_ASSERT(m_callbacks, type.is_val() || type.is_map() || type.is_seq()); - _RYML_CB_ASSERT(m_callbacks, type.is_val() + type.is_map() + type.is_seq() == 1); - _RYML_CB_ASSERT(m_callbacks, type.has_key() == has_key(node) || (has_key(node) && !type.has_key())); - NodeData *d = _p(node); - if(type.is_map() && is_map(node)) - return false; - else if(type.is_seq() && is_seq(node)) - return false; - else if(type.is_val() && is_val(node)) - return false; - d->m_type = (d->m_type & (~(MAP|SEQ|VAL))) | type; - remove_children(node); - return true; + m_was_inside_qmrk = false; } //----------------------------------------------------------------------------- -size_t Tree::duplicate(size_t node, size_t parent, size_t after) + +template +void ParseEngine::_relocate_arena(csubstr prev_arena, substr next_arena) { - return duplicate(this, node, parent, after); + #define _ryml_relocate(s) \ + if(s.is_sub(prev_arena)) \ + { \ + s.str = next_arena.str + (s.str - prev_arena.str); \ + } + _ryml_relocate(m_buf); + _ryml_relocate(m_newline_offsets_buf); + for(size_t i = 0; i < m_pending_tags.num_entries; ++i) + _ryml_relocate(m_pending_tags.annotations[i].str); + for(size_t i = 0; i < m_pending_anchors.num_entries; ++i) + _ryml_relocate(m_pending_anchors.annotations[i].str); + #undef _ryml_relocate } -size_t Tree::duplicate(Tree const* src, size_t node, size_t parent, size_t after) +template +void ParseEngine::_s_relocate_arena(void* data, csubstr prev_arena, substr next_arena) { - _RYML_CB_ASSERT(m_callbacks, src != nullptr); - _RYML_CB_ASSERT(m_callbacks, node != NONE); - _RYML_CB_ASSERT(m_callbacks, parent != NONE); - _RYML_CB_ASSERT(m_callbacks, ! src->is_root(node)); - - size_t copy = _claim(); - - _copy_props(copy, src, node); - _set_hierarchy(copy, parent, after); - duplicate_children(src, node, copy, NONE); - - return copy; + ((ParseEngine*)data)->_relocate_arena(prev_arena, next_arena); } + //----------------------------------------------------------------------------- -size_t Tree::duplicate_children(size_t node, size_t parent, size_t after) -{ - return duplicate_children(this, node, parent, after); -} -size_t Tree::duplicate_children(Tree const* src, size_t node, size_t parent, size_t after) +template +template +void ParseEngine::_fmt_msg(DumpFn &&dumpfn) const { - _RYML_CB_ASSERT(m_callbacks, src != nullptr); - _RYML_CB_ASSERT(m_callbacks, node != NONE); - _RYML_CB_ASSERT(m_callbacks, parent != NONE); - _RYML_CB_ASSERT(m_callbacks, after == NONE || has_child(parent, after)); - - size_t prev = after; - for(size_t i = src->first_child(node); i != NONE; i = src->next_sibling(i)) + auto const *const C4_RESTRICT st = m_evt_handler->m_curr; + auto const& lc = st->line_contents; + csubstr contents = lc.stripped; + if(contents.len) { - prev = duplicate(src, i, parent, prev); + // print the yaml src line + size_t offs = 3u + to_chars(substr{}, st->pos.line) + to_chars(substr{}, st->pos.col); + if(m_file.len) + { + detail::_dump(dumpfn, "{}:", m_file); + offs += m_file.len + 1; + } + detail::_dump(dumpfn, "{}:{}: ", st->pos.line, st->pos.col); + csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u)); + csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr("...")); + detail::_dump(dumpfn, "{}{} (size={})\n", maybe_full_content, maybe_ellipsis, contents.len); + // highlight the remaining portion of the previous line + size_t firstcol = (size_t)(lc.rem.begin() - lc.full.begin()); + size_t lastcol = firstcol + lc.rem.len; + for(size_t i = 0; i < offs + firstcol; ++i) + dumpfn(" "); + dumpfn("^"); + for(size_t i = 1, e = (lc.rem.len < 80u ? lc.rem.len : 80u); i < e; ++i) + dumpfn("~"); + detail::_dump(dumpfn, "{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1); + } + else + { + dumpfn("\n"); } - return prev; +#ifdef RYML_DBG + // next line: print the state flags + { + char flagbuf_[128]; + detail::_dump(dumpfn, "top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_evt_handler->m_curr->flags)); + } +#endif } + //----------------------------------------------------------------------------- -void Tree::duplicate_contents(size_t node, size_t where) -{ - duplicate_contents(this, node, where); -} -void Tree::duplicate_contents(Tree const *src, size_t node, size_t where) +template +template +void ParseEngine::_err(csubstr fmt, Args const& C4_RESTRICT ...args) const { - _RYML_CB_ASSERT(m_callbacks, src != nullptr); - _RYML_CB_ASSERT(m_callbacks, node != NONE); - _RYML_CB_ASSERT(m_callbacks, where != NONE); - _copy_props_wo_key(where, src, node); - duplicate_children(src, node, where, last_child(where)); + char errmsg[RYML_ERRMSG_SIZE]; + detail::_SubstrWriter writer(errmsg); + auto dumpfn = [&writer](csubstr s){ writer.append(s); }; + detail::_dump(dumpfn, fmt, args...); + writer.append('\n'); + _fmt_msg(dumpfn); + size_t len = writer.pos < RYML_ERRMSG_SIZE ? writer.pos : RYML_ERRMSG_SIZE; + m_evt_handler->cancel_parse(); + m_evt_handler->m_stack.m_callbacks.m_error(errmsg, len, m_evt_handler->m_curr->pos, m_evt_handler->m_stack.m_callbacks.m_user_data); } -//----------------------------------------------------------------------------- -size_t Tree::duplicate_children_no_rep(size_t node, size_t parent, size_t after) -{ - return duplicate_children_no_rep(this, node, parent, after); -} -size_t Tree::duplicate_children_no_rep(Tree const *src, size_t node, size_t parent, size_t after) +//----------------------------------------------------------------------------- +#ifdef RYML_DBG +template +template +void ParseEngine::_dbg(csubstr fmt, Args const& C4_RESTRICT ...args) const { - _RYML_CB_ASSERT(m_callbacks, node != NONE); - _RYML_CB_ASSERT(m_callbacks, parent != NONE); - _RYML_CB_ASSERT(m_callbacks, after == NONE || has_child(parent, after)); - - // don't loop using pointers as there may be a relocation - - // find the position where "after" is - size_t after_pos = NONE; - if(after != NONE) + if(_dbg_enabled()) { - for(size_t i = first_child(parent), icount = 0; i != NONE; ++icount, i = next_sibling(i)) - { - if(i == after) - { - after_pos = icount; - break; - } - } - _RYML_CB_ASSERT(m_callbacks, after_pos != NONE); + auto dumpfn = [](csubstr s){ if(s.str) fwrite(s.str, 1, s.len, stdout); }; + detail::_dump(dumpfn, fmt, args...); + dumpfn("\n"); + _fmt_msg(dumpfn); } +} +#endif - // for each child to be duplicated... - size_t prev = after; - for(size_t i = src->first_child(node), icount = 0; i != NONE; ++icount, i = src->next_sibling(i)) + +//----------------------------------------------------------------------------- +template +bool ParseEngine::_finished_file() const +{ + bool ret = m_evt_handler->m_curr->pos.offset >= m_buf.len; + if(ret) { - if(is_seq(parent)) - { - prev = duplicate(i, parent, prev); - } - else - { - _RYML_CB_ASSERT(m_callbacks, is_map(parent)); - // does the parent already have a node with key equal to that of the current duplicate? - size_t rep = NONE, rep_pos = NONE; - for(size_t j = first_child(parent), jcount = 0; j != NONE; ++jcount, j = next_sibling(j)) - { - if(key(j) == key(i)) - { - rep = j; - rep_pos = jcount; - break; - } - } - if(rep == NONE) // there is no repetition; just duplicate - { - prev = duplicate(src, i, parent, prev); - } - else // yes, there is a repetition - { - if(after_pos != NONE && rep_pos < after_pos) - { - // rep is located before the node which will be inserted, - // and will be overridden by the duplicate. So replace it. - remove(rep); - prev = duplicate(src, i, parent, prev); - } - else if(prev == NONE) - { - // first iteration with prev = after = NONE and repetition - prev = rep; - } - else if(rep != prev) - { - // rep is located after the node which will be inserted - // and overrides it. So move the rep into this node's place. - move(rep, prev); - prev = rep; - } - } // there's a repetition - } + _c4dbgp("finished file!!!"); } + return ret; +} - return prev; +template +C4_HOT C4_ALWAYS_INLINE bool ParseEngine::_finished_line() const +{ + return m_evt_handler->m_curr->line_contents.rem.empty(); } //----------------------------------------------------------------------------- -void Tree::merge_with(Tree const *src, size_t src_node, size_t dst_node) +template +void ParseEngine::_maybe_skip_whitespace_tokens() { - _RYML_CB_ASSERT(m_callbacks, src != nullptr); - if(src_node == NONE) - src_node = src->root_id(); - if(dst_node == NONE) - dst_node = root_id(); - _RYML_CB_ASSERT(m_callbacks, src->has_val(src_node) || src->is_seq(src_node) || src->is_map(src_node)); + csubstr rem = m_evt_handler->m_curr->line_contents.rem; + if(rem.len && (rem.str[0] == ' ' _RYML_WITH_TAB_TOKENS(|| rem.str[0] == '\t'))) + { + size_t pos = rem.first_not_of(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); + if(pos == npos) + pos = rem.len; // maybe the line is just all whitespace + _c4dbgpf("skip {} whitespace characters", pos); + _line_progressed(pos); + } +} - if(src->has_val(src_node)) +template +void ParseEngine::_maybe_skipchars(char c) +{ + csubstr rem = m_evt_handler->m_curr->line_contents.rem; + if(rem.len && rem.str[0] == c) { - if( ! has_val(dst_node)) - { - if(has_children(dst_node)) - remove_children(dst_node); - } - if(src->is_keyval(src_node)) - _copy_props(dst_node, src, src_node); - else if(src->is_val(src_node)) - _copy_props_wo_key(dst_node, src, src_node); - else - C4_NEVER_REACH(); + size_t pos = rem.first_not_of(c); + if(pos == npos) + pos = rem.len; // maybe the line is just all c + _c4dbgpf("skip {}x'{}'", pos, c); + _line_progressed(pos); } - else if(src->is_seq(src_node)) +} + +#ifdef RYML_NO_COVERAGE__TO_BE_DELETED +template +void ParseEngine::_maybe_skipchars_up_to(char c, size_t max_to_skip) +{ + csubstr rem = m_evt_handler->m_curr->line_contents.rem; + if(rem.len && rem.str[0] == c) { - if( ! is_seq(dst_node)) + size_t pos = rem.first_not_of(c); + if(pos == npos) + pos = rem.len; // maybe the line is just all c + if(pos > max_to_skip) + pos = max_to_skip; + _c4dbgpf("skip {}x'{}'", pos, c); + _line_progressed(pos); + } +} +#endif + +template +template +void ParseEngine::_skipchars(const char (&chars)[N]) +{ + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with_any(chars)); + size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(chars); + if(pos == npos) + pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line is just whitespace + _c4dbgpf("skip {} characters", pos); + _line_progressed(pos); +} + +template +void ParseEngine::_skip_comment() +{ + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with('#')); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full)); + csubstr rem = m_evt_handler->m_curr->line_contents.rem; + csubstr full = m_evt_handler->m_curr->line_contents.full; + // raise an error if the comment is not preceded by whitespace + if(!full.begins_with('#')) + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.str > full.str); + const char c = full[(size_t)(rem.str - full.str - 1)]; + if(C4_UNLIKELY(c != ' ' && c != '\t')) + _RYML_CB_ERR(m_evt_handler->m_stack.m_callbacks, "comment not preceded by whitespace"); + } + else + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.str == full.str); + } + _c4dbgpf("comment was '{}'", rem); + _line_progressed(rem.len); +} + +template +void ParseEngine::_maybe_skip_comment() +{ + csubstr s = m_evt_handler->m_curr->line_contents.rem.triml(' '); + if(s.begins_with('#')) + { + _line_progressed((size_t)(s.str - m_evt_handler->m_curr->line_contents.rem.str)); + _skip_comment(); + } +} + +template +bool ParseEngine::_maybe_scan_following_colon() noexcept +{ + if(m_evt_handler->m_curr->line_contents.rem.len) + { + if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t') { - if(has_children(dst_node)) - remove_children(dst_node); - _clear_type(dst_node); - if(src->has_key(src_node)) - to_seq(dst_node, src->key(src_node)); - else - to_seq(dst_node); + size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t"); + if(pos == npos) + pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line has only spaces + _c4dbgpf("skip {}x'{}'", pos, ' '); + _line_progressed(pos); } - for(size_t sch = src->first_child(src_node); sch != NONE; sch = src->next_sibling(sch)) + if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] == ':')) { - size_t dch = append_child(dst_node); - _copy_props_wo_key(dch, src, sch); - merge_with(src, sch, dch); + _c4dbgp("found ':' colon next"); + _line_progressed(1); + return true; } } - else if(src->is_map(src_node)) + return false; +} + +template +bool ParseEngine::_maybe_scan_following_comma() noexcept +{ + if(m_evt_handler->m_curr->line_contents.rem.len) { - if( ! is_map(dst_node)) + if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t') { - if(has_children(dst_node)) - remove_children(dst_node); - _clear_type(dst_node); - if(src->has_key(src_node)) - to_map(dst_node, src->key(src_node)); - else - to_map(dst_node); + size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t"); + if(pos == npos) + pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line has only spaces + _c4dbgpf("skip {}x'{}'", pos, ' '); + _line_progressed(pos); } - for(size_t sch = src->first_child(src_node); sch != NONE; sch = src->next_sibling(sch)) + if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] == ',')) { - size_t dch = find_child(dst_node, src->key(sch)); - if(dch == NONE) - { - dch = append_child(dst_node); - _copy_props(dch, src, sch); - } - merge_with(src, sch, dch); + _c4dbgp("found ',' comma next"); + _line_progressed(1); + return true; } } - else - { - C4_NEVER_REACH(); - } + return false; } //----------------------------------------------------------------------------- -namespace detail { -/** @todo make this part of the public API, refactoring as appropriate - * to be able to use the same resolver to handle multiple trees (one - * at a time) */ -struct ReferenceResolver +template +csubstr ParseEngine::_scan_anchor() { - struct refdata - { - NodeType type; - size_t node; - size_t prev_anchor; - size_t target; - size_t parent_ref; - size_t parent_ref_sibling; - }; + csubstr s = m_evt_handler->m_curr->line_contents.rem; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('&')); + csubstr anchor = s.range(1, s.first_of(' ')); + _line_progressed(1u + anchor.len); + _maybe_skipchars(' '); + return anchor; +} - Tree *t; - /** from the specs: "an alias node refers to the most recent - * node in the serialization having the specified anchor". So - * we need to start looking upward from ref nodes. - * - * @see http://yaml.org/spec/1.2/spec.html#id2765878 */ - stack refs; +template +csubstr ParseEngine::_scan_ref_seq() +{ + csubstr s = m_evt_handler->m_curr->line_contents.rem; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('*')); + csubstr ref = s.first(s.first_of(",] :")); + _line_progressed(ref.len); + return ref; +} + +template +csubstr ParseEngine::_scan_ref_map() +{ + csubstr s = m_evt_handler->m_curr->line_contents.rem; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('*')); + csubstr ref = s.first(s.first_of(",} ")); + _line_progressed(ref.len); + return ref; +} - ReferenceResolver(Tree *t_) : t(t_), refs(t_->callbacks()) +template +csubstr ParseEngine::_scan_tag() +{ + csubstr rem = m_evt_handler->m_curr->line_contents.rem.triml(' '); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.begins_with('!')); + csubstr t; + if(rem.begins_with("!!")) { - resolve(); + _c4dbgp("begins with '!!'"); + if(has_any(FLOW)) + t = rem.left_of(rem.first_of(" ,")); + else + t = rem.left_of(rem.first_of(' ')); } - - void store_anchors_and_refs() + else if(rem.begins_with("!<")) { - // minimize (re-)allocations by counting first - size_t num_anchors_and_refs = count_anchors_and_refs(t->root_id()); - if(!num_anchors_and_refs) - return; - refs.reserve(num_anchors_and_refs); - - // now descend through the hierarchy - _store_anchors_and_refs(t->root_id()); - - // finally connect the reference list - size_t prev_anchor = npos; - size_t count = 0; - for(auto &rd : refs) - { - rd.prev_anchor = prev_anchor; - if(rd.type.is_anchor()) - prev_anchor = count; - ++count; - } + _c4dbgp("begins with '!<'"); + t = rem.left_of(rem.first_of('>'), true); } - - size_t count_anchors_and_refs(size_t n) + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + else if(rem.begins_with("!h!")) { - size_t c = 0; - c += t->has_key_anchor(n); - c += t->has_val_anchor(n); - c += t->is_key_ref(n); - c += t->is_val_ref(n); - for(size_t ch = t->first_child(n); ch != NONE; ch = t->next_sibling(ch)) - c += count_anchors_and_refs(ch); - return c; + _c4dbgp("begins with '!h!'"); + t = rem.left_of(rem.first_of(' ')); + } + #endif + else + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.begins_with('!')); + _c4dbgp("begins with '!'"); + if(has_any(FLOW)) + t = rem.left_of(rem.first_of(" ,")); + else + t = rem.left_of(rem.first_of(' ')); } + _line_progressed(t.len); + _maybe_skip_whitespace_tokens(); + return t; +} + + +//----------------------------------------------------------------------------- + +template +bool ParseEngine::_is_valid_start_scalar_plain_flow(csubstr s) +{ + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.empty()); - void _store_anchors_and_refs(size_t n) + // it's not a scalar if it starts with any of these characters: + switch(s.str[0]) { - if(t->is_key_ref(n) || t->is_val_ref(n) || (t->has_key(n) && t->key(n) == "<<")) + // these are all legal tokens which mean no scalar is starting: + case '[': + case ']': + case '{': + case '}': + case '!': + case '&': + case '*': + case '|': + case '>': + case '#': + _c4dbgpf("not a scalar: found non-scalar token '{}'", _c4prc(s.str[0])); + return false; + // '-' and ':' are illegal at the beginning if not followed by a scalar character + case '-': + case ':': + if(s.len > 1) { - if(t->is_seq(n)) + switch(s.str[1]) { - // for merging multiple inheritance targets - // <<: [ *CENTER, *BIG ] - for(size_t ich = t->first_child(n); ich != NONE; ich = t->next_sibling(ich)) + case '\n': + case '\r': + case '{': + case '[': + //_RYML_WITHOUT_TAB_TOKENS(case '\t'): + _c4err("invalid token \":{}\"", _c4prc(s.str[1])); + break; + case ' ': + case '}': + case ']': + if(s.str[0] == ':') { - RYML_ASSERT(t->num_children(ich) == 0); - refs.push({VALREF, ich, npos, npos, n, t->next_sibling(n)}); + _c4dbgpf("not a scalar: found non-scalar token '{}{}'", s.str[0], s.str[1]); + return false; } - return; - } - if(t->is_key_ref(n) && t->key(n) != "<<") // insert key refs BEFORE inserting val refs - { - RYML_CHECK((!t->has_key(n)) || t->key(n).ends_with(t->key_ref(n))); - refs.push({KEYREF, n, npos, npos, NONE, NONE}); - } - if(t->is_val_ref(n)) - { - RYML_CHECK((!t->has_val(n)) || t->val(n).ends_with(t->val_ref(n))); - refs.push({VALREF, n, npos, npos, NONE, NONE}); + break; + default: + break; } } - if(t->has_key_anchor(n)) + else { - RYML_CHECK(t->has_key(n)); - refs.push({KEYANCH, n, npos, npos, NONE, NONE}); + return false; } - if(t->has_val_anchor(n)) + break; + case '?': + if(s.len > 1) { - RYML_CHECK(t->has_val(n) || t->is_container(n)); - refs.push({VALANCH, n, npos, npos, NONE, NONE}); + switch(s.str[1]) + { + case ' ': + case '\n': + case '\r': + _RYML_WITHOUT_TAB_TOKENS(case '\t':) + _c4dbgpf("not a scalar: found non-scalar token '?{}'", _c4prc(s.str[1])); + return false; + case '{': + case '}': + case '[': + case ']': + _c4err("invalid token \"?{}\"", _c4prc(s.str[1])); + break; + default: + break; + } } - for(size_t ch = t->first_child(n); ch != NONE; ch = t->next_sibling(ch)) + else { - _store_anchors_and_refs(ch); + return false; } + break; + // everything else is a legal starting character + default: + break; } - size_t lookup_(refdata *C4_RESTRICT ra) + return true; +} + +template +bool ParseEngine::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc) +{ + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RMAP)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ|RSEQIMAP)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL)); + + substr s = m_evt_handler->m_curr->line_contents.rem; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' ')); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with('\n')); + + if(!s.len) + return false; + + if(!_is_valid_start_scalar_plain_flow(s)) + return false; + + _c4dbgp("scanning seqflow scalar..."); + + const size_t start_offset = m_evt_handler->m_curr->pos.offset; + bool needs_filter = false; + while(true) { - RYML_ASSERT(ra->type.is_key_ref() || ra->type.is_val_ref()); - RYML_ASSERT(ra->type.is_key_ref() != ra->type.is_val_ref()); - csubstr refname; - if(ra->type.is_val_ref()) + _c4dbgpf("scanning scalar: curr line=[{}]~~~{}~~~", s.len, s); + for(size_t i = 0; i < s.len; ++i) { - refname = t->val_ref(ra->node); + const char c = s.str[i]; + switch(c) + { + case ',': + _c4dbgpf("found terminating character at {}: '{}'", i, c); + _line_progressed(i); + if(m_evt_handler->m_curr->pos.offset + i > start_offset) + { + goto ended_scalar; + } + else + { + _c4dbgp("at the beginning. no scalar here."); + return false; + } + break; + case ']': + _c4dbgpf("found terminating character at {}: '{}'", i, c); + _line_progressed(i); + goto ended_scalar; + break; + case '#': + _c4dbgp("found suspicious '#'"); + if(!i || (s.str[i-1] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[i-1] == '\t'))) + { + _c4dbgpf("found terminating character at {}: '{}'", i, c); + _line_progressed(i); + goto ended_scalar; + } + break; + case ':': + _c4dbgp("found suspicious ':'"); + if(s.len > i+1) + { + const char next = s.str[i+1]; + _c4dbgpf("next char is '{}'", _c4prc(next)); + if(next == ' ' || next == ',' _RYML_WITH_TAB_TOKENS(|| next == '\t')) + { + _c4dbgp("map starting!"); + if(m_evt_handler->m_curr->pos.offset + i > start_offset) + { + _c4dbgp("scalar finished!"); + _line_progressed(i); + goto ended_scalar; + } + else + { + _c4dbgp("at the beginning. no scalar here."); + return false; + } + } + else + { + _c4dbgp("it's a scalar indeed."); + ++i; // skip the next char + } + } + else if(s.len == i+1) + { + _c4dbgp("':' at line end. map starting!"); + return false; + } + break; + case '[': + case '{': + case '}': + _line_progressed(i); + _c4err("invalid character: '{}'", c); // noreturn + default: + ; + } } - else + _line_progressed(s.len); + if(!_finished_file()) { - RYML_ASSERT(ra->type.is_key_ref()); - refname = t->key_ref(ra->node); + _c4dbgp("next line!"); + _line_ended(); + _scan_line(); } - while(ra->prev_anchor != npos) + else { - ra = &refs[ra->prev_anchor]; - if(t->has_anchor(ra->node, refname)) - return ra->node; + _c4dbgp("file finished!"); + goto ended_scalar; } - - #ifndef RYML_ERRMSG_SIZE - #define RYML_ERRMSG_SIZE 1024 - #endif - - char errmsg[RYML_ERRMSG_SIZE]; - snprintf(errmsg, RYML_ERRMSG_SIZE, "anchor does not exist: '%.*s'", - static_cast(refname.size()), refname.data()); - c4::yml::error(errmsg); - return NONE; + s = m_evt_handler->m_curr->line_contents.rem; + needs_filter = true; } - void resolve() - { - store_anchors_and_refs(); - if(refs.empty()) - return; +ended_scalar: - /* from the specs: "an alias node refers to the most recent - * node in the serialization having the specified anchor". So - * we need to start looking upward from ref nodes. - * - * @see http://yaml.org/spec/1.2/spec.html#id2765878 */ - for(size_t i = 0, e = refs.size(); i < e; ++i) - { - auto &C4_RESTRICT rd = refs.top(i); - if( ! rd.type.is_ref()) - continue; - rd.target = lookup_(&rd); - } - } + sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); + sc->needs_filter = needs_filter; -}; // ReferenceResolver -} // namespace detail + _c4prscalar("scanned plain scalar", sc->scalar, /*keep_newlines*/true); -void Tree::resolve() + return true; +} + +template +bool ParseEngine::_scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc) { - if(m_size == 0) - return; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ) || has_any(RSEQIMAP)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP|RSEQIMAP)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK)); - detail::ReferenceResolver rr(this); + substr s = m_evt_handler->m_curr->line_contents.rem; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' ')); - // insert the resolved references - size_t prev_parent_ref = NONE; - size_t prev_parent_ref_after = NONE; - for(auto const& C4_RESTRICT rd : rr.refs) + if(!s.len) + return false; + + if(!_is_valid_start_scalar_plain_flow(s)) + return false; + + _c4dbgp("scanning scalar..."); + + const size_t start_offset = m_evt_handler->m_curr->pos.offset; + bool needs_filter = false; + while(true) { - if( ! rd.type.is_ref()) - continue; - if(rd.parent_ref != NONE) - { - _RYML_CB_ASSERT(m_callbacks, is_seq(rd.parent_ref)); - size_t after, p = parent(rd.parent_ref); - if(prev_parent_ref != rd.parent_ref) - { - after = rd.parent_ref;//prev_sibling(rd.parent_ref_sibling); - prev_parent_ref_after = after; - } - else - { - after = prev_parent_ref_after; - } - prev_parent_ref = rd.parent_ref; - prev_parent_ref_after = duplicate_children_no_rep(rd.target, p, after); - remove(rd.node); - } - else + for(size_t i = 0; i < s.len; ++i) { - if(has_key(rd.node) && is_key_ref(rd.node) && key(rd.node) == "<<") + const char c = s.str[i]; + switch(c) { - _RYML_CB_ASSERT(m_callbacks, is_keyval(rd.node)); - size_t p = parent(rd.node); - size_t after = prev_sibling(rd.node); - duplicate_children_no_rep(rd.target, p, after); - remove(rd.node); - } - else if(rd.type.is_key_ref()) - { - _RYML_CB_ASSERT(m_callbacks, is_key_ref(rd.node)); - _RYML_CB_ASSERT(m_callbacks, has_key_anchor(rd.target) || has_val_anchor(rd.target)); - if(has_val_anchor(rd.target) && val_anchor(rd.target) == key_ref(rd.node)) - { - _RYML_CB_CHECK(m_callbacks, !is_container(rd.target)); - _RYML_CB_CHECK(m_callbacks, has_val(rd.target)); - _p(rd.node)->m_key.scalar = val(rd.target); - _add_flags(rd.node, KEY); - } - else - { - _RYML_CB_CHECK(m_callbacks, key_anchor(rd.target) == key_ref(rd.node)); - _p(rd.node)->m_key.scalar = key(rd.target); - _add_flags(rd.node, VAL); - } - } - else - { - _RYML_CB_ASSERT(m_callbacks, rd.type.is_val_ref()); - if(has_key_anchor(rd.target) && key_anchor(rd.target) == val_ref(rd.node)) + case ',': + case '}': + _line_progressed(i); + _c4dbgpf("found terminating character: '{}'", c); + goto ended_scalar; + case ':': + if(s.len == i+1 || s.str[i+1] == ' ' || s.str[i+1] == ',' || s.str[i+1] == '}' _RYML_WITH_TAB_TOKENS(|| s.str[i+1] == '\t')) { - _RYML_CB_CHECK(m_callbacks, !is_container(rd.target)); - _RYML_CB_CHECK(m_callbacks, has_val(rd.target)); - _p(rd.node)->m_val.scalar = key(rd.target); - _add_flags(rd.node, VAL); + _line_progressed(i); + _c4dbgpf("found terminating character: '{}'", c); + goto ended_scalar; } + break; + case '{': + case '[': + _line_progressed(i); + _c4err("invalid character: '{}'", c); // noreturn + break; + case ']': + _line_progressed(i); + if(has_any(RSEQIMAP)) + goto ended_scalar; else + _c4err("invalid character: '{}'", c); // noreturn + break; + case '#': + if(!i || s.str[i-1] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[i-1] == '\t')) { - duplicate_contents(rd.target, rd.node); + _line_progressed(i); + _c4dbgpf("found terminating character: '{}'", c); + goto ended_scalar; } + break; + default: + ; } } + _c4dbgp("next line!"); + _line_progressed(s.len); + if(!_finished_file()) + { + _c4dbgp("next line!"); + _line_ended(); + _scan_line(); + } + else + { + _c4dbgp("file finished!"); + goto ended_scalar; + } + s = m_evt_handler->m_curr->line_contents.rem; + needs_filter = true; } - // clear anchors and refs - for(auto const& C4_RESTRICT ar : rr.refs) - { - rem_anchor_ref(ar.node); - if(ar.parent_ref != NONE) - if(type(ar.parent_ref) != NOTYPE) - remove(ar.parent_ref); - } +ended_scalar: -} + sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \n\t\r", " \n\r")); + sc->needs_filter = needs_filter; -//----------------------------------------------------------------------------- + _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar); -size_t Tree::num_children(size_t node) const -{ - size_t count = 0; - for(size_t i = first_child(node); i != NONE; i = next_sibling(i)) - ++count; - return count; + return true; } -size_t Tree::child(size_t node, size_t pos) const +template +bool ParseEngine::_scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc) { - _RYML_CB_ASSERT(m_callbacks, node != NONE); - size_t count = 0; - for(size_t i = first_child(node); i != NONE; i = next_sibling(i)) - { - if(count++ == pos) - return i; - } - return NONE; -} + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RMAP)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW)); -size_t Tree::child_pos(size_t node, size_t ch) const -{ - size_t count = 0; - for(size_t i = first_child(node); i != NONE; i = next_sibling(i)) - { - if(i == ch) - return count; - ++count; - } - return npos; -} + substr s = m_evt_handler->m_curr->line_contents.rem; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' ')); -#if defined(__clang__) -# pragma clang diagnostic push -# pragma GCC diagnostic ignored "-Wnull-dereference" -#elif defined(__GNUC__) -# pragma GCC diagnostic push -# if __GNUC__ >= 6 -# pragma GCC diagnostic ignored "-Wnull-dereference" -# endif -#endif + if(!s.len) + return false; -size_t Tree::find_child(size_t node, csubstr const& name) const -{ - _RYML_CB_ASSERT(m_callbacks, node != NONE); - _RYML_CB_ASSERT(m_callbacks, is_map(node)); - if(get(node)->m_first_child == NONE) + _c4dbgp("scanning scalar..."); + + switch(s.str[0]) { - _RYML_CB_ASSERT(m_callbacks, _p(node)->m_last_child == NONE); - return NONE; + case ']': + case '{': + case ',': + _c4dbgp("not a scalar."); + return false; } - else + { - _RYML_CB_ASSERT(m_callbacks, _p(node)->m_last_child != NONE); + const size_t len = _is_special_json_scalar(s); + if(len) + { + sc->scalar = s.first(len); + sc->needs_filter = false; + _c4dbgpf("special json scalar: '{}'", sc->scalar); + _line_progressed(len); + return true; + } } - for(size_t i = first_child(node); i != NONE; i = next_sibling(i)) + + // must be a number + size_t i = 0; + for( ; i < s.len; ++i) { - if(_p(i)->m_key.scalar == name) + const char c = s.str[i]; + switch(c) { - return i; + case ',': + case ']': + case ' ': + case '\t': + _c4dbgpf("found terminating character: '{}'", c); + goto ended_scalar; + case '#': + if(!i || s.str[i-1] == ' ') + { + _c4dbgpf("found terminating character: '{}'", c); + goto ended_scalar; + } + break; + default: + ; } } - return NONE; -} - -#if defined(__clang__) -# pragma clang diagnostic pop -#elif defined(__GNUC__) -# pragma GCC diagnostic pop -#endif +ended_scalar: -//----------------------------------------------------------------------------- + if(C4_LIKELY(i > 0)) + { + _line_progressed(i); + sc->scalar = s.first(i); + sc->needs_filter = false; + _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar); + return true; + } -void Tree::to_val(size_t node, csubstr val, type_bits more_flags) -{ - _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); - _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || ! parent_is_map(node)); - _set_flags(node, VAL|more_flags); - _p(node)->m_key.clear(); - _p(node)->m_val = val; + return false; } -void Tree::to_keyval(size_t node, csubstr key, csubstr val, type_bits more_flags) +template +bool ParseEngine::_scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc) { - _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); - _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || parent_is_map(node)); - _set_flags(node, KEYVAL|more_flags); - _p(node)->m_key = key; - _p(node)->m_val = val; -} + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL)); -void Tree::to_map(size_t node, type_bits more_flags) -{ - _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); - _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || ! parent_is_map(node)); // parent must not have children with keys - _set_flags(node, MAP|more_flags); - _p(node)->m_key.clear(); - _p(node)->m_val.clear(); -} + substr s = m_evt_handler->m_curr->line_contents.rem; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' ')); -void Tree::to_map(size_t node, csubstr key, type_bits more_flags) -{ - _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); - _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || parent_is_map(node)); - _set_flags(node, KEY|MAP|more_flags); - _p(node)->m_key = key; - _p(node)->m_val.clear(); -} + if(!s.len) + return false; -void Tree::to_seq(size_t node, type_bits more_flags) -{ - _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); - _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || parent_is_seq(node)); - _set_flags(node, SEQ|more_flags); - _p(node)->m_key.clear(); - _p(node)->m_val.clear(); -} + _c4dbgp("scanning scalar..."); -void Tree::to_seq(size_t node, csubstr key, type_bits more_flags) -{ - _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); - _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || parent_is_map(node)); - _set_flags(node, KEY|SEQ|more_flags); - _p(node)->m_key = key; - _p(node)->m_val.clear(); -} + { + const size_t len = _is_special_json_scalar(s); + if(len) + { + sc->scalar = s.first(len); + sc->needs_filter = false; + _c4dbgpf("special json scalar: '{}'", sc->scalar); + _line_progressed(len); + return true; + } + } -void Tree::to_doc(size_t node, type_bits more_flags) -{ - _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); - _set_flags(node, DOC|more_flags); - _p(node)->m_key.clear(); - _p(node)->m_val.clear(); -} + // must be a number + size_t i = 0; + for( ; i < s.len; ++i) + { + const char c = s.str[i]; + switch(c) + { + case ',': + case '}': + case ' ': + case '\t': + _c4dbgpf("found terminating character: '{}'", c); + goto ended_scalar; + case '#': + if(!i || s.str[i-1] == ' ') + { + _c4dbgpf("found terminating character: '{}'", c); + goto ended_scalar; + } + break; + default: + ; + } + } -void Tree::to_stream(size_t node, type_bits more_flags) -{ - _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); - _set_flags(node, STREAM|more_flags); - _p(node)->m_key.clear(); - _p(node)->m_val.clear(); -} +ended_scalar: + if(C4_LIKELY(i > 0)) + { + _line_progressed(i); + sc->scalar = s.first(i); + sc->needs_filter = false; + _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar); + return true; + } -//----------------------------------------------------------------------------- -size_t Tree::num_tag_directives() const -{ - // this assumes we have a very small number of tag directives - for(size_t i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) - if(m_tag_directives[i].handle.empty()) - return i; - return RYML_MAX_TAG_DIRECTIVES; + return false; } -void Tree::clear_tag_directives() +template +bool ParseEngine::_is_doc_begin(csubstr s) { - for(TagDirective &td : m_tag_directives) - td = {}; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s[0] == '-'); + return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_begin_token(s)); } -size_t Tree::add_tag_directive(TagDirective const& td) +template +bool ParseEngine::_is_doc_end(csubstr s) { - _RYML_CB_CHECK(m_callbacks, !td.handle.empty()); - _RYML_CB_CHECK(m_callbacks, !td.prefix.empty()); - _RYML_CB_ASSERT(m_callbacks, td.handle.begins_with('!')); - _RYML_CB_ASSERT(m_callbacks, td.handle.ends_with('!')); - // https://yaml.org/spec/1.2.2/#rule-ns-word-char - _RYML_CB_ASSERT(m_callbacks, td.handle == '!' || td.handle == "!!" || td.handle.trim('!').first_not_of("01234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-") == npos); - size_t pos = num_tag_directives(); - _RYML_CB_CHECK(m_callbacks, pos < RYML_MAX_TAG_DIRECTIVES); - m_tag_directives[pos] = td; - return pos; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s[0] == '.'); + return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_end_token(s)); } -size_t Tree::resolve_tag(substr output, csubstr tag, size_t node_id) const +template +bool ParseEngine::_scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc, size_t indentation) { - // lookup from the end. We want to find the first directive that - // matches the tag and has a target node id leq than the given - // node_id. - for(size_t i = RYML_MAX_TAG_DIRECTIVES-1; i != (size_t)-1; --i) + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(BLCK|RUNK|USTY)); + + substr s = m_evt_handler->m_curr->line_contents.rem; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' ')); + + if(!s.len) + return false; + + switch(s.str[0]) { - auto const& td = m_tag_directives[i]; - if(td.handle.empty()) - continue; - if(tag.begins_with(td.handle) && td.next_node_id <= node_id) + case '-': + if(_is_blck_token(s)) + { + return false; + } + else if(_is_doc_begin(s)) + { + _c4dbgp("token is doc start"); + return false; + } + break; + case ':': + case '?': + if(_is_blck_token(s)) + return false; + break; + case '[': + case '{': + case '&': + case '*': + case '!': + _RYML_WITH_TAB_TOKENS(case '\t':) + return false; + case '.': + if(_is_doc_end(s)) + { + _c4dbgp("token is doc end"); + return false; + } + break; + } + + _c4dbgpf("plain scalar! indentation={}", indentation); + + const size_t start_offset = m_evt_handler->m_curr->pos.offset; + const size_t start_line = m_evt_handler->m_curr->pos.line; + + bool needs_filter = false; + while(true) + { + _c4dbgpf("plain scalar line: [{}]~~~{}~~~", s.len, s); + for(size_t i = 0; i < s.len; ++i) { - _RYML_CB_ASSERT(m_callbacks, tag.len >= td.handle.len); - csubstr rest = tag.sub(td.handle.len); - size_t len = 1u + td.prefix.len + rest.len + 1u; - size_t numpc = rest.count('%'); - if(numpc == 0) + const char curr = s.str[i]; + //_c4dbgpf("[{}]='{}'", i, _c4prc(curr)); + switch(curr) { - if(len <= output.len) + case ':': + _c4dbgpf("[{}]: got suspicious ':'", i); + // are there more characters? + if((i + 1 == s.len) || ((s.str[i+1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[i+1] == '\t')))) + { + _c4dbgpf("followed by '{}'", i+1 == s.len ? csubstr("\\n") : _c4prc(s.str[i+1])); + _line_progressed(i); + // ': ' is accepted only on the first line + if(C4_LIKELY(m_evt_handler->m_curr->pos.line == start_line)) + { + _c4dbgp("start line. scalar ends here"); + goto ended_scalar; + } + else + { + _c4err("parse error"); + } + } + else + { + size_t j = i; + while(j + 1 < s.len && s.str[j+1] == ':') + { + _c4dbgp("skip colon"); + ++j; + } + i = j > i ? j-1 : i; + _c4dbgp("nothing to see here"); + } + break; + case '#': + _c4dbgp("got suspicious '#'"); + if(!i || (s.str[i-1] == ' ' || s.str[i-1] == '\t')) + { + _c4dbgp("comment! scalar ends here"); + _line_progressed(i); + goto ended_scalar; + } + else { - output.str[0] = '<'; - memcpy(1u + output.str, td.prefix.str, td.prefix.len); - memcpy(1u + output.str + td.prefix.len, rest.str, rest.len); - output.str[1u + td.prefix.len + rest.len] = '>'; + _c4dbgp("nothing to see here"); } + break; } - else + } + _line_progressed(s.len); + csubstr next_peeked = _peek_next_line(m_evt_handler->m_curr->pos.offset); + next_peeked = next_peeked.trimr("\n\r"); + const size_t next_indentation = next_peeked.first_not_of(' '); + _c4dbgpf("indentation curr={} next={}", indentation, next_indentation); + if(next_indentation < indentation) + { + _c4dbgp("smaller indentation! scalar ended"); + goto ended_scalar; + } + else if(next_indentation == 0 && next_peeked.len > 0) + { + const char first = next_peeked.str[0]; + switch(first) { - // need to decode URI % sequences - size_t pos = rest.find('%'); - _RYML_CB_ASSERT(m_callbacks, pos != npos); - do { - size_t next = rest.first_not_of("0123456789abcdefABCDEF", pos+1); - if(next == npos) - next = rest.len; - _RYML_CB_CHECK(m_callbacks, pos+1 < next); - _RYML_CB_CHECK(m_callbacks, pos+1 + 2 <= next); - size_t delta = next - (pos+1); - len -= delta; - pos = rest.find('%', pos+1); - } while(pos != npos); - if(len <= output.len) + case '-': + next_peeked = next_peeked.trimr("\n\r"); + _c4dbgpf("doc begin? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ? "..." : ""); + if(_is_doc_begin_token(next_peeked)) + { + _c4dbgp("doc begin! scalar ended"); + goto ended_scalar; + } + break; + case '.': + next_peeked = next_peeked.trimr("\n\r"); + _c4dbgpf("doc end? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ? "..." : ""); + if(_is_doc_end_token(next_peeked)) { - size_t prev = 0, wpos = 0; - auto appendstr = [&](csubstr s) { memcpy(output.str + wpos, s.str, s.len); wpos += s.len; }; - auto appendchar = [&](char c) { output.str[wpos++] = c; }; - appendchar('<'); - appendstr(td.prefix); - pos = rest.find('%'); - _RYML_CB_ASSERT(m_callbacks, pos != npos); - do { - size_t next = rest.first_not_of("0123456789abcdefABCDEF", pos+1); - if(next == npos) - next = rest.len; - _RYML_CB_CHECK(m_callbacks, pos+1 < next); - _RYML_CB_CHECK(m_callbacks, pos+1 + 2 <= next); - uint8_t val; - if(C4_UNLIKELY(!read_hex(rest.range(pos+1, next), &val) || val > 127)) - _RYML_CB_ERR(m_callbacks, "invalid URI character"); - appendstr(rest.range(prev, pos)); - appendchar((char)val); - prev = next; - pos = rest.find('%', pos+1); - } while(pos != npos); - _RYML_CB_ASSERT(m_callbacks, pos == npos); - _RYML_CB_ASSERT(m_callbacks, prev > 0); - _RYML_CB_ASSERT(m_callbacks, rest.len >= prev); - appendstr(rest.sub(prev)); - appendchar('>'); - _RYML_CB_ASSERT(m_callbacks, wpos == len); + _c4dbgp("doc end! scalar ended"); + goto ended_scalar; } + break; } - return len; } + // load with next line + _c4dbgp("next line!"); + if(!_finished_file()) + { + _c4dbgp("next line!"); + _line_ended(); + _scan_line(); + } + else + { + _c4dbgp("file finished!"); + goto ended_scalar; + } + s = m_evt_handler->m_curr->line_contents.rem; + needs_filter = true; } - return 0; // return 0 to signal that the tag is local and cannot be resolved -} -namespace { -csubstr _transform_tag(Tree *t, csubstr tag, size_t node) -{ - size_t required_size = t->resolve_tag(substr{}, tag, node); - if(!required_size) - return tag; - const char *prev_arena = t->arena().str; - substr buf = t->alloc_arena(required_size); - _RYML_CB_ASSERT(t->m_callbacks, t->arena().str == prev_arena); - size_t actual_size = t->resolve_tag(buf, tag, node); - _RYML_CB_ASSERT(t->m_callbacks, actual_size <= required_size); - return buf.first(actual_size); +ended_scalar: + + sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(" \n\r\t"); + sc->needs_filter = needs_filter; + + _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar); + + return true; } -void _resolve_tags(Tree *t, size_t node) + +template +bool ParseEngine::_scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc) { - for(size_t child = t->first_child(node); child != NONE; child = t->next_sibling(child)) - { - if(t->has_key(child) && t->has_key_tag(child)) - t->set_key_tag(child, _transform_tag(t, t->key_tag(child), child)); - if(t->has_val(child) && t->has_val_tag(child)) - t->set_val_tag(child, _transform_tag(t, t->val_tag(child), child)); - _resolve_tags(t, child); - } + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RMAP)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(BLCK)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL)); + return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u); } -size_t _count_resolved_tags_size(Tree const* t, size_t node) + +template +bool ParseEngine::_scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc) { - size_t sz = 0; - for(size_t child = t->first_child(node); child != NONE; child = t->next_sibling(child)) - { - if(t->has_key(child) && t->has_key_tag(child)) - sz += t->resolve_tag(substr{}, t->key_tag(child), child); - if(t->has_val(child) && t->has_val_tag(child)) - sz += t->resolve_tag(substr{}, t->val_tag(child), child); - sz += _count_resolved_tags_size(t, child); - } - return sz; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(BLCK)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK)); + return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u); } -} // namespace -void Tree::resolve_tags() +template +bool ParseEngine::_scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc) { - if(empty()) - return; - if(num_tag_directives() == 0) - return; - size_t needed_size = _count_resolved_tags_size(this, root_id()); - if(needed_size) - reserve_arena(arena_size() + needed_size); - _resolve_tags(this, root_id()); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RUNK|USTY)); + return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref); } //----------------------------------------------------------------------------- -csubstr Tree::lookup_result::resolved() const +template +substr ParseEngine::_peek_next_line(size_t pos) const { - csubstr p = path.first(path_pos); - if(p.ends_with('.')) - p = p.first(p.len-1); - return p; + substr rem{}; // declare here because of the goto + size_t nlpos{}; // declare here because of the goto + pos = pos == npos ? m_evt_handler->m_curr->pos.offset : pos; + if(pos >= m_buf.len) + goto next_is_empty; + + // look for the next newline chars, and jump to the right of those + rem = from_next_line(m_buf.sub(pos)); + if(rem.empty()) + goto next_is_empty; + + // now get everything up to and including the following newline chars + nlpos = rem.first_of("\r\n"); + if((nlpos != csubstr::npos) && (nlpos + 1 < rem.len)) + nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]); + rem = rem.left_of(nlpos, /*include_pos*/true); + + _c4dbgpf("peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr("\r\n")); + return rem; + +next_is_empty: + _c4dbgpf("peek next line @ {}: (len=0)''", pos); + return {}; } -csubstr Tree::lookup_result::unresolved() const +//----------------------------------------------------------------------------- + +template +void ParseEngine::_scan_line() { - return path.sub(path_pos); + if(C4_LIKELY(m_evt_handler->m_curr->pos.offset < m_buf.len)) + m_evt_handler->m_curr->line_contents.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset); + else + m_evt_handler->m_curr->line_contents.reset(m_buf.last(0), m_buf.last(0)); } -void Tree::_advance(lookup_result *r, size_t more) const +template +void ParseEngine::_line_progressed(size_t ahead) { - r->path_pos += more; - if(r->path.sub(r->path_pos).begins_with('.')) - ++r->path_pos; + _c4dbgpf("line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->line_contents.full.len, ahead, m_evt_handler->m_curr->pos.col, m_evt_handler->m_curr->pos.col+ahead, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset+ahead); + m_evt_handler->m_curr->pos.offset += ahead; + m_evt_handler->m_curr->pos.col += ahead; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col <= m_evt_handler->m_curr->line_contents.stripped.len+1); + m_evt_handler->m_curr->line_contents.rem = m_evt_handler->m_curr->line_contents.rem.sub(ahead); } -Tree::lookup_result Tree::lookup_path(csubstr path, size_t start) const +template +void ParseEngine::_line_ended() { - if(start == NONE) - start = root_id(); - lookup_result r(path, start); - if(path.empty()) - return r; - _lookup_path(&r); - if(r.target == NONE && r.closest == start) - r.closest = NONE; - return r; + _c4dbgpf("line[{}] ({} cols) ended! offset {}-->{} / col {}-->{}", + m_evt_handler->m_curr->pos.line, + m_evt_handler->m_curr->line_contents.full.len, + m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset + m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len, + m_evt_handler->m_curr->pos.col, 1); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == m_evt_handler->m_curr->line_contents.stripped.len + 1); + m_evt_handler->m_curr->pos.offset += m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len; + ++m_evt_handler->m_curr->pos.line; + m_evt_handler->m_curr->pos.col = 1; } -size_t Tree::lookup_path_or_modify(csubstr default_value, csubstr path, size_t start) +template +void ParseEngine::_line_ended_undo() { - size_t target = _lookup_path_or_create(path, start); - if(parent_is_map(target)) - to_keyval(target, key(target), default_value); - else - to_val(target, default_value); - return target; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == 1u); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line > 0u); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len); + const size_t delta = m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len; + _c4dbgpf("line[{}] undo ended! line {}-->{}, offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line - 1, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset - delta); + m_evt_handler->m_curr->pos.offset -= delta; + --m_evt_handler->m_curr->pos.line; + m_evt_handler->m_curr->pos.col = m_evt_handler->m_curr->line_contents.stripped.len + 1u; + // don't forget to undo also the changes to the remainder of the line + //_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_buf.len || m_buf[m_evt_handler->m_curr->pos.offset] == '\n' || m_buf[m_evt_handler->m_curr->pos.offset] == '\r'); + m_evt_handler->m_curr->line_contents.rem = m_buf.sub(m_evt_handler->m_curr->pos.offset, 0); } -size_t Tree::lookup_path_or_modify(Tree const *src, size_t src_node, csubstr path, size_t start) + +//----------------------------------------------------------------------------- +template +void ParseEngine::_set_indentation(size_t indentation) { - size_t target = _lookup_path_or_create(path, start); - merge_with(src, src_node, target); - return target; + m_evt_handler->m_curr->indref = indentation; + _c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref); } -size_t Tree::_lookup_path_or_create(csubstr path, size_t start) +template +void ParseEngine::_save_indentation() { - if(start == NONE) - start = root_id(); - lookup_result r(path, start); - _lookup_path(&r); - if(r.target != NONE) + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begin() >= m_evt_handler->m_curr->line_contents.full.begin()); + m_evt_handler->m_curr->indref = m_evt_handler->m_curr->line_contents.current_col(); + _c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref); +} + + +//----------------------------------------------------------------------------- + +template +void ParseEngine::_end_map_blck() +{ + _c4dbgp("mapblck: end"); + if(has_any(RKCL|RVAL)) { - C4_ASSERT(r.unresolved().empty()); - return r.target; + _c4dbgp("mapblck: set missing val"); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->set_val_scalar_plain({}); } - _lookup_path_modify(&r); - return r.target; + else if(has_any(QMRK)) + { + _c4dbgp("mapblck: set missing keyval"); + _handle_annotations_before_blck_key_scalar(); + m_evt_handler->set_key_scalar_plain({}); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->set_val_scalar_plain({}); + } + m_evt_handler->end_map(); } -void Tree::_lookup_path(lookup_result *r) const +template +void ParseEngine::_end_seq_blck() { - C4_ASSERT( ! r->unresolved().empty()); - _lookup_path_token parent{"", type(r->closest)}; - size_t node; - do + if(has_any(RVAL)) { - node = _next_node(r, &parent); - if(node != NONE) - r->closest = node; - if(r->unresolved().empty()) - { - r->target = node; - return; - } - } while(node != NONE); + _c4dbgp("seqblck: set missing val"); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->set_val_scalar_plain({}); + } + m_evt_handler->end_seq(); } -void Tree::_lookup_path_modify(lookup_result *r) +template +void ParseEngine::_end2_map() { - C4_ASSERT( ! r->unresolved().empty()); - _lookup_path_token parent{"", type(r->closest)}; - size_t node; - do + _c4dbgp("map: end"); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP)); + if(has_any(BLCK)) { - node = _next_node_modify(r, &parent); - if(node != NONE) - r->closest = node; - if(r->unresolved().empty()) - { - r->target = node; - return; - } - } while(node != NONE); + _end_map_blck(); + } + else + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(USTY)); + m_evt_handler->_pop(); + } } -size_t Tree::_next_node(lookup_result * r, _lookup_path_token *parent) const +template +void ParseEngine::_end2_seq() { - _lookup_path_token token = _next_token(r, *parent); - if( ! token) - return NONE; + _c4dbgp("seq: end"); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ)); + if(has_any(BLCK)) + { + _end_seq_blck(); + } + else + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(USTY)); + m_evt_handler->_pop(); + } +} - size_t node = NONE; - csubstr prev = token.value; - if(token.type == MAP || token.type == SEQ) +template +void ParseEngine::_begin2_doc() +{ + m_doc_empty = true; + add_flags(RDOC); + m_evt_handler->begin_doc(); + m_evt_handler->m_curr->indref = 0; // ? +} + +template +void ParseEngine::_begin2_doc_expl() +{ + m_doc_empty = true; + add_flags(RDOC); + m_evt_handler->begin_doc_expl(); + m_evt_handler->m_curr->indref = 0; // ? +} + +template +void ParseEngine::_end2_doc() +{ + _c4dbgp("doc: end"); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RDOC)); + if(m_doc_empty) { - _RYML_CB_ASSERT(m_callbacks, !token.value.begins_with('[')); - //_RYML_CB_ASSERT(m_callbacks, is_container(r->closest) || r->closest == NONE); - _RYML_CB_ASSERT(m_callbacks, is_map(r->closest)); - node = find_child(r->closest, token.value); + _c4dbgp("doc was empty; add empty val"); + m_evt_handler->set_val_scalar_plain({}); } - else if(token.type == KEYVAL) + m_evt_handler->end_doc(); +} + +template +void ParseEngine::_end2_doc_expl() +{ + _c4dbgp("doc: end"); + if(m_doc_empty) { - _RYML_CB_ASSERT(m_callbacks, r->unresolved().empty()); - if(is_map(r->closest)) - node = find_child(r->closest, token.value); + _c4dbgp("doc: no children; add empty val"); + m_evt_handler->set_val_scalar_plain({}); } - else if(token.type == KEY) + m_evt_handler->end_doc_expl(); +} + +template +void ParseEngine::_maybe_begin_doc() +{ + if(has_none(RDOC)) { - _RYML_CB_ASSERT(m_callbacks, token.value.begins_with('[') && token.value.ends_with(']')); - token.value = token.value.offs(1, 1).trim(' '); - size_t idx = 0; - _RYML_CB_CHECK(m_callbacks, from_chars(token.value, &idx)); - node = child(r->closest, idx); + _c4dbgp("doc must be started"); + _begin2_doc(); } - else +} +template +void ParseEngine::_maybe_end_doc() +{ + if(has_any(RDOC)) { - C4_NEVER_REACH(); + _c4dbgp("doc must be finished"); + _end2_doc(); } +} - if(node != NONE) +template +void ParseEngine::_end_doc_suddenly__pop() +{ + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1); + if(m_evt_handler->m_stack[0].flags & RDOC) { - *parent = token; + _c4dbgp("root is RDOC"); + if(m_evt_handler->m_curr->level != 0) + _handle_indentation_pop(&m_evt_handler->m_stack[0]); + } + else if((m_evt_handler->m_stack.size() > 1) && (m_evt_handler->m_stack[1].flags & RDOC)) + { + _c4dbgp("root is STREAM"); + if(m_evt_handler->m_curr->level != 1) + _handle_indentation_pop(&m_evt_handler->m_stack[1]); } else { - csubstr p = r->path.sub(r->path_pos > 0 ? r->path_pos - 1 : r->path_pos); - r->path_pos -= prev.len; - if(p.begins_with('.')) - r->path_pos -= 1u; + _c4err("internal error"); } + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RDOC)); +} - return node; +template +void ParseEngine::_end_doc_suddenly() +{ + _c4dbgp("end doc suddenly"); + _end_doc_suddenly__pop(); + _end2_doc_expl(); + addrem_flags(RUNK|RTOP|NDOC, RMAP|RSEQ|RDOC); } -size_t Tree::_next_node_modify(lookup_result * r, _lookup_path_token *parent) +template +void ParseEngine::_start_doc_suddenly() { - _lookup_path_token token = _next_token(r, *parent); - if( ! token) - return NONE; + _c4dbgp("start doc suddenly"); + _end_doc_suddenly__pop(); + _end2_doc(); + _begin2_doc_expl(); +} - size_t node = NONE; - if(token.type == MAP || token.type == SEQ) - { - _RYML_CB_ASSERT(m_callbacks, !token.value.begins_with('[')); - //_RYML_CB_ASSERT(m_callbacks, is_container(r->closest) || r->closest == NONE); - if( ! is_container(r->closest)) - { - if(has_key(r->closest)) - to_map(r->closest, key(r->closest)); - else - to_map(r->closest); - } - else - { - if(is_map(r->closest)) - node = find_child(r->closest, token.value); - else - { - size_t pos = NONE; - _RYML_CB_CHECK(m_callbacks, c4::atox(token.value, &pos)); - _RYML_CB_ASSERT(m_callbacks, pos != NONE); - node = child(r->closest, pos); - } - } - if(node == NONE) - { - _RYML_CB_ASSERT(m_callbacks, is_map(r->closest)); - node = append_child(r->closest); - NodeData *n = _p(node); - n->m_key.scalar = token.value; - n->m_type.add(KEY); - } - } - else if(token.type == KEYVAL) +template +void ParseEngine::_end_stream() +{ + _c4dbgpf("end_stream, level={} node_id={}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->node_id); + if(has_all(RSEQ|FLOW)) + _c4err("missing terminating ]"); + else if(has_all(RMAP|FLOW)) + _c4err("missing terminating }"); + if(m_evt_handler->m_stack.size() > 1) + _handle_indentation_pop(m_evt_handler->m_stack.begin()); + if(has_all(RDOC)) { - _RYML_CB_ASSERT(m_callbacks, r->unresolved().empty()); - if(is_map(r->closest)) - { - node = find_child(r->closest, token.value); - if(node == NONE) - node = append_child(r->closest); - } - else - { - _RYML_CB_ASSERT(m_callbacks, !is_seq(r->closest)); - _add_flags(r->closest, MAP); - node = append_child(r->closest); - } - NodeData *n = _p(node); - n->m_key.scalar = token.value; - n->m_val.scalar = ""; - n->m_type.add(KEYVAL); + _end2_doc(); } - else if(token.type == KEY) - { - _RYML_CB_ASSERT(m_callbacks, token.value.begins_with('[') && token.value.ends_with(']')); - token.value = token.value.offs(1, 1).trim(' '); - size_t idx; - if( ! from_chars(token.value, &idx)) - return NONE; - if( ! is_container(r->closest)) - { - if(has_key(r->closest)) - { - csubstr k = key(r->closest); - _clear_type(r->closest); - to_seq(r->closest, k); - } - else - { - _clear_type(r->closest); - to_seq(r->closest); - } - } - _RYML_CB_ASSERT(m_callbacks, is_container(r->closest)); - node = child(r->closest, idx); - if(node == NONE) + else if(has_all(RTOP|RUNK)) + { + if(m_pending_anchors.num_entries || m_pending_tags.num_entries) { - _RYML_CB_ASSERT(m_callbacks, num_children(r->closest) <= idx); - for(size_t i = num_children(r->closest); i <= idx; ++i) + if(m_doc_empty) { - node = append_child(r->closest); - if(i < idx) - { - if(is_map(r->closest)) - to_keyval(node, /*"~"*/{}, /*"~"*/{}); - else if(is_seq(r->closest)) - to_val(node, /*"~"*/{}); - } + m_evt_handler->begin_doc(); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->end_doc(); } } } - else + m_evt_handler->end_stream(); +} + + +template +void ParseEngine::_handle_indentation_pop(ParserState const* popto) +{ + _c4dbgpf("popping {} level{}: from level {}(@ind={}) to level {}(@ind={})", m_evt_handler->m_curr->level - popto->level, (((m_evt_handler->m_curr->level - popto->level) > 1) ? "s" : ""), m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, popto->level, popto->indref); + while(m_evt_handler->m_curr != popto) { - C4_NEVER_REACH(); + if(has_any(RSEQ)) + { + _c4dbgpf("popping seq at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr); + _end2_seq(); + } + else if(has_any(RMAP)) + { + _c4dbgpf("popping map at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr); + _end2_map(); + } + else + { + break; + } } - - _RYML_CB_ASSERT(m_callbacks, node != NONE); - *parent = token; - return node; + _c4dbgpf("current level is {} (indentation={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref); } -/** types of tokens: - * - seeing "map." ---> "map"/MAP - * - finishing "scalar" ---> "scalar"/KEYVAL - * - seeing "seq[n]" ---> "seq"/SEQ (--> "[n]"/KEY) - * - seeing "[n]" ---> "[n]"/KEY - */ -Tree::_lookup_path_token Tree::_next_token(lookup_result *r, _lookup_path_token const& parent) const +template +void ParseEngine::_handle_indentation_pop_from_block_seq() { - csubstr unres = r->unresolved(); - if(unres.empty()) - return {}; - - // is it an indexation like [0], [1], etc? - if(unres.begins_with('[')) + // search the stack frame to jump to based on its indentation + using state_type = typename EventHandler::state; + state_type const* popto = nullptr; + auto &stack = m_evt_handler->m_stack; + _RYML_CB_ASSERT(stack.m_callbacks, stack.is_contiguous()); // this search relies on the stack being contiguous + _RYML_CB_ASSERT(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end()); + const size_t ind = m_evt_handler->m_curr->line_contents.indentation; + #ifdef RYML_DBG + if(_dbg_enabled()) { - size_t pos = unres.find(']'); - if(pos == csubstr::npos) - return {}; - csubstr idx = unres.first(pos + 1); - _advance(r, pos + 1); - return {idx, KEY}; + char flagbuf_[128]; + for(state_type const& s : stack) + _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags)); } - - // no. so it must be a name - size_t pos = unres.first_of(".["); - if(pos == csubstr::npos) + #endif + for(state_type const* s = m_evt_handler->m_curr-1; s >= stack.begin(); --s) { - _advance(r, unres.len); - NodeType t; - if(( ! parent) || parent.type.is_seq()) - return {unres, VAL}; - return {unres, KEYVAL}; + _c4dbgpf("searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id); + if(s->indref == ind) + { + _c4dbgpf("gotit!!! level={} node={}", s->level, s->node_id); + popto = s; + break; + } } - - // it's either a map or a seq - _RYML_CB_ASSERT(m_callbacks, unres[pos] == '.' || unres[pos] == '['); - if(unres[pos] == '.') + if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level) { - _RYML_CB_ASSERT(m_callbacks, pos != 0); - _advance(r, pos + 1); - return {unres.first(pos), MAP}; + _c4err("parse error: incorrect indentation?"); } - - _RYML_CB_ASSERT(m_callbacks, unres[pos] == '['); - _advance(r, pos); - return {unres.first(pos), SEQ}; + _handle_indentation_pop(popto); } +template +void ParseEngine::_handle_indentation_pop_from_block_map() +{ + // search the stack frame to jump to based on its indentation + using state_type = typename EventHandler::state; + auto &stack = m_evt_handler->m_stack; + _RYML_CB_ASSERT(stack.m_callbacks, stack.is_contiguous()); // this search relies on the stack being contiguous + _RYML_CB_ASSERT(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end()); + const size_t ind = m_evt_handler->m_curr->line_contents.indentation; + state_type const* popto = nullptr; + #ifdef RYML_DBG + char flagbuf_[128]; + if(_dbg_enabled()) + { + for(state_type const& s : stack) + _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags)); + } + #endif + for(state_type const* s = m_evt_handler->m_curr-1; s > stack.begin(); --s) // never go to the stack bottom. that's the root + { + _c4dbgpf("searching for state with indentation {}. current: ind={},level={},node={},flags={}", ind, s->indref, s->level, s->node_id, detail::_parser_flags_to_str(flagbuf_, s->flags)); + if(s->indref < ind) + { + break; + } + else if(s->indref == ind) + { + _c4dbgpf("same indentation!!! level={} node={}", s->level, s->node_id); + if(popto && has_any(RTOP, s) && has_none(RMAP|RSEQ, s)) + { + break; + } + popto = s; + if(has_all(RSEQ|BLCK, s)) + { + csubstr rem = m_evt_handler->m_curr->line_contents.rem; + const size_t first = rem.first_not_of(' '); + _RYML_CB_ASSERT(stack.m_callbacks, first == ind || first == npos); + rem = rem.right_of(first, true); + _c4dbgpf("indentless? rem='{}' first={}", rem, first); + if(rem.begins_with('-') && _is_blck_token(rem)) + { + _c4dbgp("parent was indentless seq"); + break; + } + } + } + } + if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level) + { + _c4err("parse error: incorrect indentation?"); + } + _handle_indentation_pop(popto); +} -} // namespace ryml -} // namespace c4 - - -C4_SUPPRESS_WARNING_GCC_POP -C4_SUPPRESS_WARNING_MSVC_POP - -#endif /* RYML_SINGLE_HDR_DEFINE_NOW */ - - -// (end https://github.com/biojppm/rapidyaml/src/c4/yml/tree.cpp) - - - -//******************************************************************************** -//-------------------------------------------------------------------------------- -// src/c4/yml/parse.cpp -// https://github.com/biojppm/rapidyaml/src/c4/yml/parse.cpp -//-------------------------------------------------------------------------------- -//******************************************************************************** - -#ifdef RYML_SINGLE_HDR_DEFINE_NOW -// amalgamate: removed include of -// https://github.com/biojppm/rapidyaml/src/c4/yml/parse.hpp -//#include "c4/yml/parse.hpp" -#if !defined(C4_YML_PARSE_HPP_) && !defined(_C4_YML_PARSE_HPP_) -#error "amalgamate: file c4/yml/parse.hpp must have been included at this point" -#endif /* C4_YML_PARSE_HPP_ */ - -// amalgamate: removed include of -// https://github.com/biojppm/rapidyaml/src/c4/error.hpp -//#include "c4/error.hpp" -#if !defined(C4_ERROR_HPP_) && !defined(_C4_ERROR_HPP_) -#error "amalgamate: file c4/error.hpp must have been included at this point" -#endif /* C4_ERROR_HPP_ */ - -// amalgamate: removed include of -// https://github.com/biojppm/rapidyaml/src/c4/utf.hpp -//#include "c4/utf.hpp" -#if !defined(C4_UTF_HPP_) && !defined(_C4_UTF_HPP_) -#error "amalgamate: file c4/utf.hpp must have been included at this point" -#endif /* C4_UTF_HPP_ */ - -// amalgamate: removed include of -// https://github.com/biojppm/rapidyaml/src/c4/dump.hpp -//#include -#if !defined(C4_DUMP_HPP_) && !defined(_C4_DUMP_HPP_) -#error "amalgamate: file c4/dump.hpp must have been included at this point" -#endif /* C4_DUMP_HPP_ */ - - -//included above: -//#include -//included above: -//#include -//included above: -//#include - -// amalgamate: removed include of -// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp -//#include "c4/yml/detail/parser_dbg.hpp" -#if !defined(C4_YML_DETAIL_PARSER_DBG_HPP_) && !defined(_C4_YML_DETAIL_PARSER_DBG_HPP_) -#error "amalgamate: file c4/yml/detail/parser_dbg.hpp must have been included at this point" -#endif /* C4_YML_DETAIL_PARSER_DBG_HPP_ */ -#ifdef RYML_DBG -// amalgamate: removed include of -// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/print.hpp -//#include "c4/yml/detail/print.hpp" -#if !defined(C4_YML_DETAIL_PRINT_HPP_) && !defined(_C4_YML_DETAIL_PRINT_HPP_) -#error "amalgamate: file c4/yml/detail/print.hpp must have been included at this point" -#endif /* C4_YML_DETAIL_PRINT_HPP_ */ +//----------------------------------------------------------------------------- +template +typename ParseEngine::ScannedScalar ParseEngine::_scan_scalar_squot() +{ + // quoted scalars can spread over multiple lines! + // nice explanation here: http://yaml-multiline.info/ -#endif + // a span to the end of the file + size_t b = m_evt_handler->m_curr->pos.offset; + substr s = m_buf.sub(b); + if(s.begins_with(' ')) + { + s = s.triml(' '); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin()); + _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin())); + } + b = m_evt_handler->m_curr->pos.offset; // take this into account + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('\'')); -#ifndef RYML_ERRMSG_SIZE - #define RYML_ERRMSG_SIZE 1024 -#endif + // skip the opening quote + _line_progressed(1); + s = s.sub(1); -//#define RYML_WITH_TAB_TOKENS -#ifdef RYML_WITH_TAB_TOKENS -#define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__ -#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with -#else -#define _RYML_WITH_TAB_TOKENS(...) -#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without -#endif + bool needs_filter = false; + size_t numlines = 1; // we already have one line + size_t pos = npos; // find the pos of the matching quote + while( ! _finished_file()) + { + const csubstr line = m_evt_handler->m_curr->line_contents.rem; + bool line_is_blank = true; + _c4dbgpf("scanning single quoted scalar @ line[{}]: ~~~{}~~~", m_evt_handler->m_curr->pos.line, line); + for(size_t i = 0; i < line.len; ++i) + { + const char curr = line.str[i]; + if(curr == '\'') // single quotes are escaped with two single quotes + { + const char next = i+1 < line.len ? line.str[i+1] : '~'; + if(next != '\'') // so just look for the first quote + { // without another after it + pos = i; + break; + } + else + { + needs_filter = true; // needs filter to remove escaped quotes + ++i; // skip the escaped quote + } + } + else if(curr != ' ') + { + line_is_blank = false; + } + } -#if defined(_MSC_VER) -# pragma warning(push) -# pragma warning(disable: 4296/*expression is always 'boolean_value'*/) -#elif defined(__clang__) -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0. -# pragma clang diagnostic ignored "-Wformat-nonliteral" -#elif defined(__GNUC__) -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0. -# pragma GCC diagnostic ignored "-Wformat-nonliteral" -# if __GNUC__ >= 7 -# pragma GCC diagnostic ignored "-Wduplicated-branches" -# endif -#endif + // leading whitespace also needs filtering + needs_filter = needs_filter + || (numlines > 1) + || line_is_blank + || (_at_line_begin() && line.begins_with(' ')); -namespace c4 { -namespace yml { + if(pos == npos) + { + _line_progressed(line.len); + ++numlines; + } + else + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] == '\''); + _line_progressed(pos + 1); // progress beyond the quote + pos = m_evt_handler->m_curr->pos.offset - b - 1; // but we stop before it + break; + } -namespace { + _line_ended(); + _scan_line(); + } -template -void _parse_dump(DumpFn dumpfn, c4::csubstr fmt, Args&& ...args) -{ - char writebuf[256]; - auto results = c4::format_dump_resume(dumpfn, writebuf, fmt, std::forward(args)...); - // resume writing if the results failed to fit the buffer - if(C4_UNLIKELY(results.bufsize > sizeof(writebuf))) // bufsize will be that of the largest element serialized. Eg int(1), will require 1 byte. + if(pos == npos) + { + _c4err("reached end of file while looking for closing quote"); + } + else { - results = format_dump_resume(dumpfn, results, writebuf, fmt, std::forward(args)...); - if(C4_UNLIKELY(results.bufsize > sizeof(writebuf))) - { - results = format_dump_resume(dumpfn, results, writebuf, fmt, std::forward(args)...); - } + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos > 0); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end()); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '\''); + s = s.sub(0, pos-1); } -} -bool _is_scalar_next__runk(csubstr s) -{ - return !(s.begins_with(": ") || s.begins_with_any("#,{}[]%&") || s.begins_with("? ") || s == "-" || s.begins_with("- ") || s.begins_with(":\"") || s.begins_with(":'")); -} + _c4prscalar("scanned squoted scalar", s, /*keep_newlines*/true); -bool _is_scalar_next__rseq_rval(csubstr s) -{ - return !(s.begins_with_any("[{!&") || s.begins_with("? ") || s.begins_with("- ") || s == "-"); + return ScannedScalar { s, needs_filter }; } -bool _is_scalar_next__rmap(csubstr s) -{ - return !(s.begins_with(": ") || s.begins_with_any("#,!&") || s.begins_with("? ") _RYML_WITH_TAB_TOKENS(|| s.begins_with(":\t"))); -} -bool _is_scalar_next__rmap_val(csubstr s) +//----------------------------------------------------------------------------- +template +typename ParseEngine::ScannedScalar ParseEngine::_scan_scalar_dquot() { - return !(s.begins_with("- ") || s.begins_with_any("{[") || s == "-"); -} + // quoted scalars can spread over multiple lines! + // nice explanation here: http://yaml-multiline.info/ -bool _is_doc_sep(csubstr s) -{ - constexpr const csubstr dashes = "---"; - constexpr const csubstr ellipsis = "..."; - constexpr const csubstr whitesp = " \t"; - if(s.begins_with(dashes)) - return s == dashes || s.sub(3).begins_with_any(whitesp); - else if(s.begins_with(ellipsis)) - return s == ellipsis || s.sub(3).begins_with_any(whitesp); - return false; -} + // a span to the end of the file + size_t b = m_evt_handler->m_curr->pos.offset; + substr s = m_buf.sub(b); + if(s.begins_with(' ')) + { + s = s.triml(' '); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin()); + _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin())); + } + b = m_evt_handler->m_curr->pos.offset; // take this into account + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('"')); -/** @p i is set to the first non whitespace character after the line - * @return the number of empty lines after the initial position */ -size_t count_following_newlines(csubstr r, size_t *C4_RESTRICT i, size_t indentation) -{ - RYML_ASSERT(r[*i] == '\n'); - size_t numnl_following = 0; - ++(*i); - for( ; *i < r.len; ++(*i)) + // skip the opening quote + _line_progressed(1); + s = s.sub(1); + + bool needs_filter = false; + + size_t numlines = 1; // we already have one line + size_t pos = npos; // find the pos of the matching quote + while( ! _finished_file()) { - if(r.str[*i] == '\n') + const csubstr line = m_evt_handler->m_curr->line_contents.rem; + bool line_is_blank = true; + _c4dbgpf("scanning double quoted scalar @ line[{}]: line='{}'", m_evt_handler->m_curr->pos.line, line); + for(size_t i = 0; i < line.len; ++i) { - ++numnl_following; - if(indentation) // skip the indentation after the newline + const char curr = line.str[i]; + if(curr != ' ') + line_is_blank = false; + // every \ is an escape + if(curr == '\\') { - size_t stop = *i + indentation; - for( ; *i < r.len; ++(*i)) - { - if(r.str[*i] != ' ' && r.str[*i] != '\r') - break; - RYML_ASSERT(*i < stop); - } - C4_UNUSED(stop); + const char next = i+1 < line.len ? line.str[i+1] : '~'; + needs_filter = true; + if(next == '"' || next == '\\') + ++i; + } + else if(curr == '"') + { + pos = i; + break; } } - else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r') // skip leading whitespace - ; + + // leading whitespace also needs filtering + needs_filter = needs_filter + || (numlines > 1) + || line_is_blank + || (_at_line_begin() && line.begins_with(' ')); + + if(pos == npos) + { + _line_progressed(line.len); + ++numlines; + } else + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] == '"'); + _line_progressed(pos + 1); // progress beyond the quote + pos = m_evt_handler->m_curr->pos.offset - b - 1; // but we stop before it break; - } - return numnl_following; -} + } -} // anon namespace + _line_ended(); + _scan_line(); + } + if(pos == npos) + { + _c4err("reached end of file looking for closing quote"); + } + else + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos > 0); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '"'); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end()); + s = s.sub(0, pos-1); + } -//----------------------------------------------------------------------------- + _c4prscalar("scanned dquoted scalar", s, /*keep_newlines*/true); -Parser::~Parser() -{ - _free(); - _clr(); + return ScannedScalar { s, needs_filter }; } -Parser::Parser(Callbacks const& cb, ParserOptions opts) - : m_options(opts) - , m_file() - , m_buf() - , m_root_id(NONE) - , m_tree() - , m_stack(cb) - , m_state() - , m_key_tag_indentation(0) - , m_key_tag2_indentation(0) - , m_key_tag() - , m_key_tag2() - , m_val_tag_indentation(0) - , m_val_tag() - , m_key_anchor_was_before(false) - , m_key_anchor_indentation(0) - , m_key_anchor() - , m_val_anchor_indentation(0) - , m_val_anchor() - , m_filter_arena() - , m_newline_offsets() - , m_newline_offsets_size(0) - , m_newline_offsets_capacity(0) - , m_newline_offsets_buf() -{ - m_stack.push(State{}); - m_state = &m_stack.top(); -} -Parser::Parser(Parser &&that) - : m_options(that.m_options) - , m_file(that.m_file) - , m_buf(that.m_buf) - , m_root_id(that.m_root_id) - , m_tree(that.m_tree) - , m_stack(std::move(that.m_stack)) - , m_state(&m_stack.top()) - , m_key_tag_indentation(that.m_key_tag_indentation) - , m_key_tag2_indentation(that.m_key_tag2_indentation) - , m_key_tag(that.m_key_tag) - , m_key_tag2(that.m_key_tag2) - , m_val_tag_indentation(that.m_val_tag_indentation) - , m_val_tag(that.m_val_tag) - , m_key_anchor_was_before(that.m_key_anchor_was_before) - , m_key_anchor_indentation(that.m_key_anchor_indentation) - , m_key_anchor(that.m_key_anchor) - , m_val_anchor_indentation(that.m_val_anchor_indentation) - , m_val_anchor(that.m_val_anchor) - , m_filter_arena(that.m_filter_arena) - , m_newline_offsets(that.m_newline_offsets) - , m_newline_offsets_size(that.m_newline_offsets_size) - , m_newline_offsets_capacity(that.m_newline_offsets_capacity) - , m_newline_offsets_buf(that.m_newline_offsets_buf) +//----------------------------------------------------------------------------- +template +void ParseEngine::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t indref) { - that._clr(); -} + _c4dbgpf("blck: indref={}", indref); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, indref != npos); -Parser::Parser(Parser const& that) - : m_options(that.m_options) - , m_file(that.m_file) - , m_buf(that.m_buf) - , m_root_id(that.m_root_id) - , m_tree(that.m_tree) - , m_stack(that.m_stack) - , m_state(&m_stack.top()) - , m_key_tag_indentation(that.m_key_tag_indentation) - , m_key_tag2_indentation(that.m_key_tag2_indentation) - , m_key_tag(that.m_key_tag) - , m_key_tag2(that.m_key_tag2) - , m_val_tag_indentation(that.m_val_tag_indentation) - , m_val_tag(that.m_val_tag) - , m_key_anchor_was_before(that.m_key_anchor_was_before) - , m_key_anchor_indentation(that.m_key_anchor_indentation) - , m_key_anchor(that.m_key_anchor) - , m_val_anchor_indentation(that.m_val_anchor_indentation) - , m_val_anchor(that.m_val_anchor) - , m_filter_arena() - , m_newline_offsets() - , m_newline_offsets_size() - , m_newline_offsets_capacity() - , m_newline_offsets_buf() -{ - if(that.m_newline_offsets_capacity) + // nice explanation here: http://yaml-multiline.info/ + csubstr s = m_evt_handler->m_curr->line_contents.rem; + csubstr trimmed = s.triml(' '); + if(trimmed.str > s.str) { - _resize_locations(that.m_newline_offsets_capacity); - _RYML_CB_CHECK(m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity); - memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t)); - m_newline_offsets_size = that.m_newline_offsets_size; + _c4dbgp("skipping whitespace"); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, trimmed.str >= s.str); + _line_progressed(static_cast(trimmed.str - s.str)); + s = trimmed; + } + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('|') || s.begins_with('>')); + + _c4dbgpf("blck: specs=[{}]~~~{}~~~", s.len, s); + + // parse the spec + BlockChomp_e chomp = CHOMP_CLIP; // default to clip unless + or - are used + size_t indentation = npos; // have to find out if no spec is given + csubstr digits; + if(s.len > 1) + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with_any("|>")); + csubstr t = s.sub(1); + _c4dbgpf("blck: spec is multichar: '{}'", t); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, t.len >= 1); + size_t pos = t.first_of("-+"); + _c4dbgpf("blck: spec chomp char at {}", pos); + if(pos != npos) + { + if(t[pos] == '-') + chomp = CHOMP_STRIP; + else if(t[pos] == '+') + chomp = CHOMP_KEEP; + if(pos == 0) + t = t.sub(1); + else + t = t.first(pos); + } + // from here to the end, only digits are considered + digits = t.left_of(t.first_not_of("0123456789")); + if( ! digits.empty()) + { + if(C4_UNLIKELY(digits.len > 1)) + _c4err("parse error: invalid indentation"); + _c4dbgpf("blck: parse indentation digits: [{}]~~~{}~~~", digits.len, digits); + if(C4_UNLIKELY( ! c4::atou(digits, &indentation))) + _c4err("parse error: could not read indentation as decimal"); + if(C4_UNLIKELY( ! indentation)) + _c4err("parse error: null indentation"); + _c4dbgpf("blck: indentation specified: {}. add {} from curr state -> {}", indentation, m_evt_handler->m_curr->indref, indentation+indref); + indentation += m_evt_handler->m_curr->indref; + } + } + + _c4dbgpf("blck: style={} chomp={} indentation={}", s.begins_with('>') ? "fold" : "literal", chomp==CHOMP_CLIP ? "clip" : (chomp==CHOMP_STRIP ? "strip" : "keep"), indentation); + + // finish the current line + _line_progressed(s.len); + _line_ended(); + _scan_line(); + + // start with a zero-length block, already pointing at the right place + substr raw_block(m_buf.data() + m_evt_handler->m_curr->pos.offset, size_t(0));// m_evt_handler->m_curr->line_contents.full.sub(0, 0); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, raw_block.begin() == m_evt_handler->m_curr->line_contents.full.begin()); + + // read every full line into a raw block, + // from which newlines are to be stripped as needed. + // + // If no explicit indentation was given, pick it from the first + // non-empty line. See + // https://yaml.org/spec/1.2.2/#8111-block-indentation-indicator + size_t num_lines = 0; + size_t first = m_evt_handler->m_curr->pos.line; + size_t provisional_indentation = npos; + LineContents lc; + while(( ! _finished_file())) + { + // peek next line, but do not advance immediately + lc.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset); + _c4dbgpf("blck: peeking at [{}]~~~{}~~~", lc.stripped.len, lc.stripped); + // evaluate termination conditions + if(indentation != npos) + { + _c4dbgpf("blck: indentation={}", indentation); + // stop when the line is deindented and not empty + if(lc.indentation < indentation && ( ! lc.rem.trim(" \t").empty())) + { + if(raw_block.len) + { + _c4dbgpf("blck: indentation decreased ref={} thisline={}", indentation, lc.indentation); + } + else + { + _c4err("indentation decreased without any scalar"); + } + break; + } + else if(indentation == 0) + { + _c4dbgpf("blck: noindent. lc.rem=[{}]~~~{}~~~", lc.rem.len, lc.rem); + if(_is_doc_token(lc.rem)) + { + _c4dbgp("blck: stop. indentation=0 and doc ended"); + break; + } + } + } + else + { + const size_t fns = lc.stripped.first_not_of(' '); + _c4dbgpf("blck: indentation ref not set. firstnonws={}", fns); + if(fns != npos) // non-empty line + { + _RYML_WITH_TAB_TOKENS( + if(C4_UNLIKELY(lc.stripped.begins_with('\t'))) + _c4err("parse error"); + ) + _c4dbgpf("blck: line not empty. indref={} indprov={} indentation={}", indref, provisional_indentation, lc.indentation); + if(provisional_indentation == npos) + { + if(lc.indentation < indref) + { + _c4dbgpf("blck: block terminated indentation={} < indref={}", lc.indentation, indref); + if(raw_block.len == 0) + { + _c4dbgp("blck: was empty, undo next line"); + _line_ended_undo(); + } + break; + } + else if(lc.indentation == m_evt_handler->m_curr->indref) + { + if(has_any(RSEQ|RMAP)) + { + _c4dbgpf("blck: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_evt_handler->m_curr->indref); + break; + } + } + _c4dbgpf("blck: set indentation ref from this line: ref={}", lc.indentation); + indentation = lc.indentation; + } + else + { + if(lc.indentation >= provisional_indentation) + { + _c4dbgpf("blck: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation); + //indentation = provisional_indentation ? provisional_indentation : lc.indentation; + indentation = lc.indentation; + } + else + { + break; + //_c4err("parse error: first non-empty block line should have at least the original indentation"); + } + } + } + else // empty line + { + _c4dbgpf("blck: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.stripped.len, lc.indentation, provisional_indentation); + if(provisional_indentation != npos) + { + if(lc.stripped.len >= provisional_indentation) + { + _c4dbgpf("blck: increase provisional_ref {} -> {}", provisional_indentation, lc.stripped.len); + provisional_indentation = lc.stripped.len; + } + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + else if(lc.indentation >= provisional_indentation && lc.indentation != npos) + { + _c4dbgpf("blck: increase provisional_ref {} -> {}", provisional_indentation, lc.indentation); + provisional_indentation = lc.indentation; + } + #endif + } + else + { + provisional_indentation = lc.indentation ? lc.indentation : has_any(RSEQ|RVAL); + _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation); + if(provisional_indentation == npos) + { + provisional_indentation = lc.stripped.len ? lc.stripped.len : has_any(RSEQ|RVAL); + _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation); + } + if(provisional_indentation < indref) + { + provisional_indentation = indref; + _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation); + } + } + } + } + // advance now that we know the folded scalar continues + m_evt_handler->m_curr->line_contents = lc; + _c4dbgpf("blck: append '{}'", m_evt_handler->m_curr->line_contents.rem); + raw_block.len += m_evt_handler->m_curr->line_contents.full.len; + _line_progressed(m_evt_handler->m_curr->line_contents.rem.len); + _line_ended(); + ++num_lines; } - if(that.m_filter_arena.len) + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line == (first + num_lines) || (raw_block.len == 0)); + C4_UNUSED(num_lines); + C4_UNUSED(first); + + if(indentation == npos) { - _resize_filter_arena(that.m_filter_arena.len); + _c4dbgpf("blck: set indentation from provisional: {}", provisional_indentation); + indentation = provisional_indentation; } -} -Parser& Parser::operator=(Parser &&that) -{ - _free(); - m_options = (that.m_options); - m_file = (that.m_file); - m_buf = (that.m_buf); - m_root_id = (that.m_root_id); - m_tree = (that.m_tree); - m_stack = std::move(that.m_stack); - m_state = (&m_stack.top()); - m_key_tag_indentation = (that.m_key_tag_indentation); - m_key_tag2_indentation = (that.m_key_tag2_indentation); - m_key_tag = (that.m_key_tag); - m_key_tag2 = (that.m_key_tag2); - m_val_tag_indentation = (that.m_val_tag_indentation); - m_val_tag = (that.m_val_tag); - m_key_anchor_was_before = (that.m_key_anchor_was_before); - m_key_anchor_indentation = (that.m_key_anchor_indentation); - m_key_anchor = (that.m_key_anchor); - m_val_anchor_indentation = (that.m_val_anchor_indentation); - m_val_anchor = (that.m_val_anchor); - m_filter_arena = that.m_filter_arena; - m_newline_offsets = (that.m_newline_offsets); - m_newline_offsets_size = (that.m_newline_offsets_size); - m_newline_offsets_capacity = (that.m_newline_offsets_capacity); - m_newline_offsets_buf = (that.m_newline_offsets_buf); - that._clr(); - return *this; -} + if(num_lines) + _line_ended_undo(); -Parser& Parser::operator=(Parser const& that) -{ - _free(); - m_options = (that.m_options); - m_file = (that.m_file); - m_buf = (that.m_buf); - m_root_id = (that.m_root_id); - m_tree = (that.m_tree); - m_stack = that.m_stack; - m_state = &m_stack.top(); - m_key_tag_indentation = (that.m_key_tag_indentation); - m_key_tag2_indentation = (that.m_key_tag2_indentation); - m_key_tag = (that.m_key_tag); - m_key_tag2 = (that.m_key_tag2); - m_val_tag_indentation = (that.m_val_tag_indentation); - m_val_tag = (that.m_val_tag); - m_key_anchor_was_before = (that.m_key_anchor_was_before); - m_key_anchor_indentation = (that.m_key_anchor_indentation); - m_key_anchor = (that.m_key_anchor); - m_val_anchor_indentation = (that.m_val_anchor_indentation); - m_val_anchor = (that.m_val_anchor); - if(that.m_filter_arena.len > 0) - _resize_filter_arena(that.m_filter_arena.len); - if(that.m_newline_offsets_capacity > m_newline_offsets_capacity) - _resize_locations(that.m_newline_offsets_capacity); - _RYML_CB_CHECK(m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity); - _RYML_CB_CHECK(m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size); - memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t)); - m_newline_offsets_size = that.m_newline_offsets_size; - m_newline_offsets_buf = that.m_newline_offsets_buf; - return *this; + _c4prscalar("scanned block", raw_block, /*keep_newlines*/true); + + sb->scalar = raw_block; + sb->indentation = indentation; + sb->chomp = chomp; } -void Parser::_clr() + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +// a debugging scaffold: +#if 0 +#define _c4dbgfws(fmt, ...) _c4dbgpf("filt_ws[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__) +#else +#define _c4dbgfws(...) +#endif + +template +template +bool ParseEngine::_filter_ws_handle_to_first_non_space(FilterProcessor &proc) { - m_options = {}; - m_file = {}; - m_buf = {}; - m_root_id = {}; - m_tree = {}; - m_stack.clear(); - m_state = {}; - m_key_tag_indentation = {}; - m_key_tag2_indentation = {}; - m_key_tag = {}; - m_key_tag2 = {}; - m_val_tag_indentation = {}; - m_val_tag = {}; - m_key_anchor_was_before = {}; - m_key_anchor_indentation = {}; - m_key_anchor = {}; - m_val_anchor_indentation = {}; - m_val_anchor = {}; - m_filter_arena = {}; - m_newline_offsets = {}; - m_newline_offsets_size = {}; - m_newline_offsets_capacity = {}; - m_newline_offsets_buf = {}; + _c4dbgfws("found whitespace '{}'", _c4prc(proc.curr())); + _RYML_CB_ASSERT(this->callbacks(), proc.curr() == ' ' || proc.curr() == '\t'); + + const size_t first_pos = proc.rpos > 0 ? proc.src.first_not_of(" \t", proc.rpos) : proc.src.first_not_of(' ', proc.rpos); + if(first_pos != npos) + { + const char first_char = proc.src[first_pos]; + _c4dbgfws("firstnonws='{}'@{}", _c4prc(first_char), first_pos); + if(first_char == '\n' || first_char == '\r') // skip trailing whitespace + { + _c4dbgfws("whitespace is trailing on line", ""); + proc.skip(first_pos - proc.rpos); + } + else // a legit whitespace + { + proc.copy(); + _c4dbgfws("legit whitespace. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar()); + } + return true; + } + _c4dbgfws("whitespace is trailing on line", ""); + return false; } -void Parser::_free() +template +template +void ParseEngine::_filter_ws_copy_trailing(FilterProcessor &proc) { - if(m_newline_offsets) + if(!_filter_ws_handle_to_first_non_space(proc)) { - _RYML_CB_FREE(m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity); - m_newline_offsets = nullptr; - m_newline_offsets_size = 0u; - m_newline_offsets_capacity = 0u; - m_newline_offsets_buf = 0u; + _c4dbgfws("... everything else is trailing whitespace - copy {} chars", proc.src.len - proc.rpos); + proc.copy(proc.src.len - proc.rpos); } - if(m_filter_arena.len) +} + +template +template +void ParseEngine::_filter_ws_skip_trailing(FilterProcessor &proc) +{ + if(!_filter_ws_handle_to_first_non_space(proc)) { - _RYML_CB_FREE(m_stack.m_callbacks, m_filter_arena.str, char, m_filter_arena.len); - m_filter_arena = {}; + _c4dbgfws("... everything else is trailing whitespace - skip {} chars", proc.src.len - proc.rpos); + proc.skip(proc.src.len - proc.rpos); } - m_stack._free(); } +#undef _c4dbgfws + //----------------------------------------------------------------------------- -void Parser::_reset() -{ - _RYML_CB_ASSERT(m_stack.m_callbacks, m_stack.size() == 1); - m_stack.clear(); - m_stack.push({}); - m_state = &m_stack.top(); - m_state->reset(m_file.str, m_root_id); +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +/* plain scalars */ + +// a debugging scaffold: +#if 0 +#define _c4dbgfps(fmt, ...) _c4dbgpf("filt_plain[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__) +#else +#define _c4dbgfps(fmt, ...) +#endif - m_key_tag_indentation = 0; - m_key_tag2_indentation = 0; - m_key_tag.clear(); - m_key_tag2.clear(); - m_val_tag_indentation = 0; - m_val_tag.clear(); - m_key_anchor_was_before = false; - m_key_anchor_indentation = 0; - m_key_anchor.clear(); - m_val_anchor_indentation = 0; - m_val_anchor.clear(); +template +template +void ParseEngine::_filter_nl_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation) +{ + _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n'); - if(m_options.locations()) + _c4dbgfps("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar()); + size_t ii = proc.rpos; + const size_t numnl_following = _count_following_newlines(proc.src, &ii, indentation); + if(numnl_following) { - _prepare_locations(); + proc.set('\n', numnl_following); + _c4dbgfps("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii); + } + else + { + const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1); + if(ret != npos) + { + proc.set(' '); + _c4dbgfps("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar()); + } + else + { + _c4dbgfps("last newline, everything else is whitespace. ii={}/{}", ii, proc.src.len); + ii = proc.src.len; + } } + proc.rpos = ii; } -//----------------------------------------------------------------------------- -template -void Parser::_fmt_msg(DumpFn &&dumpfn) const +template +template +auto ParseEngine::_filter_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation) -> decltype(proc.result()) { - auto const& lc = m_state->line_contents; - csubstr contents = lc.stripped; - if(contents.len) + _RYML_CB_ASSERT(this->callbacks(), indentation != npos); + _c4dbgfps("before=[{}]~~~{}~~~", proc.src.len, proc.src); + + while(proc.has_more_chars()) { - // print the yaml src line - size_t offs = 3u + to_chars(substr{}, m_state->pos.line) + to_chars(substr{}, m_state->pos.col); - if(m_file.len) + const char curr = proc.curr(); + _c4dbgfps("'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar()); + switch(curr) { - _parse_dump(dumpfn, "{}:", m_file); - offs += m_file.len + 1; + case ' ': + _RYML_WITH_TAB_TOKENS(case '\t':) + _c4dbgfps("whitespace", curr); + _filter_ws_skip_trailing(proc); + break; + case '\n': + _c4dbgfps("newline", curr); + _filter_nl_plain(proc, /*indentation*/indentation); + break; + case '\r': // skip \r --- https://stackoverflow.com/questions/1885900 + _c4dbgfps("carriage return, ignore", curr); + proc.skip(); + break; + default: + proc.copy(); + break; } - _parse_dump(dumpfn, "{}:{}: ", m_state->pos.line, m_state->pos.col); - csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u)); - csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr("...")); - _parse_dump(dumpfn, "{}{} (size={})\n", maybe_full_content, maybe_ellipsis, contents.len); - // highlight the remaining portion of the previous line - size_t firstcol = (size_t)(lc.rem.begin() - lc.full.begin()); - size_t lastcol = firstcol + lc.rem.len; - for(size_t i = 0; i < offs + firstcol; ++i) - dumpfn(" "); - dumpfn("^"); - for(size_t i = 1, e = (lc.rem.len < 80u ? lc.rem.len : 80u); i < e; ++i) - dumpfn("~"); - _parse_dump(dumpfn, "{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1); - } - else - { - dumpfn("\n"); } -#ifdef RYML_DBG - // next line: print the state flags - { - char flagbuf_[64]; - _parse_dump(dumpfn, "top state: {}\n", _prfl(flagbuf_, m_state->flags)); - } -#endif + _c4dbgfps("after[{}]=~~~{}~~~", proc.wpos, proc.sofar()); + + return proc.result(); } +#undef _c4dbgfps -//----------------------------------------------------------------------------- -template -void Parser::_err(csubstr fmt, Args const& C4_RESTRICT ...args) const + +template +FilterResult ParseEngine::filter_scalar_plain(csubstr scalar, substr dst, size_t indentation) { - char errmsg[RYML_ERRMSG_SIZE]; - detail::_SubstrWriter writer(errmsg); - auto dumpfn = [&writer](csubstr s){ writer.append(s); }; - _parse_dump(dumpfn, fmt, args...); - writer.append('\n'); - _fmt_msg(dumpfn); - size_t len = writer.pos < RYML_ERRMSG_SIZE ? writer.pos : RYML_ERRMSG_SIZE; - m_tree->m_callbacks.m_error(errmsg, len, m_state->pos, m_tree->m_callbacks.m_user_data); + FilterProcessorSrcDst proc(scalar, dst); + return _filter_plain(proc, indentation); } -//----------------------------------------------------------------------------- -#ifdef RYML_DBG -template -void Parser::_dbg(csubstr fmt, Args const& C4_RESTRICT ...args) const +template +FilterResult ParseEngine::filter_scalar_plain_in_place(substr dst, size_t cap, size_t indentation) { - auto dumpfn = [](csubstr s){ fwrite(s.str, 1, s.len, stdout); }; - _parse_dump(dumpfn, fmt, args...); - dumpfn("\n"); - _fmt_msg(dumpfn); + FilterProcessorInplaceEndExtending proc(dst, cap); + return _filter_plain(proc, indentation); } -#endif + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- //----------------------------------------------------------------------------- -bool Parser::_finished_file() const +/* single quoted */ + +// a debugging scaffold: +#if 0 +#define _c4dbgfsq(fmt, ...) _c4dbgpf("filt_squo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__) +#else +#define _c4dbgfsq(fmt, ...) +#endif + +template +template +void ParseEngine::_filter_nl_squoted(FilterProcessor &C4_RESTRICT proc) { - bool ret = m_state->pos.offset >= m_buf.len; - if(ret) + _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n'); + + _c4dbgfsq("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar()); + size_t ii = proc.rpos; + const size_t numnl_following = _count_following_newlines(proc.src, &ii); + if(numnl_following) { - _c4dbgp("finished file!!!"); + proc.set('\n', numnl_following); + _c4dbgfsq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii); } - return ret; + else + { + const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1); + if(ret != npos) + { + proc.set(' '); + _c4dbgfsq("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar()); + } + else + { + proc.set(' '); + _c4dbgfsq("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar()); + } + } + proc.rpos = ii; } -//----------------------------------------------------------------------------- -bool Parser::_finished_line() const +template +template +auto ParseEngine::_filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result()) { - return m_state->line_contents.rem.empty(); -} + _c4dbgfsq("before=[{}]~~~{}~~~", proc.src.len, proc.src); -//----------------------------------------------------------------------------- -void Parser::parse_in_place(csubstr file, substr buf, Tree *t, size_t node_id) -{ - m_file = file; - m_buf = buf; - m_root_id = node_id; - m_tree = t; - _reset(); - while( ! _finished_file()) + // from the YAML spec for double-quoted scalars: + // https://yaml.org/spec/1.2-old/spec.html#style/flow/single-quoted + while(proc.has_more_chars()) { - _scan_line(); - while( ! _finished_line()) - _handle_line(); - if(_finished_file()) - break; // it may have finished because of multiline blocks - _line_ended(); + const char curr = proc.curr(); + _c4dbgfsq("'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar()); + switch(curr) + { + case ' ': + case '\t': + _c4dbgfsq("whitespace", curr); + _filter_ws_copy_trailing(proc); + break; + case '\n': + _c4dbgfsq("newline", curr); + _filter_nl_squoted(proc); + break; + case '\r': // skip \r --- https://stackoverflow.com/questions/1885900 + _c4dbgfsq("skip cr", curr); + proc.skip(); + break; + case '\'': + _c4dbgfsq("squote", curr); + if(proc.next() == '\'') + { + _c4dbgfsq("two consecutive squotes", curr); + proc.skip(); + proc.copy(); + } + else + { + _c4err("filter error"); + } + break; + default: + proc.copy(); + break; + } } - _handle_finished_file(); + + _c4dbgfsq(": #filteredchars={} after=~~~[{}]{}~~~", proc.src.len-proc.sofar().len, proc.sofar().len, proc.sofar()); + + return proc.result(); } -//----------------------------------------------------------------------------- -void Parser::_handle_finished_file() +#undef _c4dbgfsq + +template +FilterResult ParseEngine::filter_scalar_squoted(csubstr scalar, substr dst) { - _end_stream(); + FilterProcessorSrcDst proc(scalar, dst); + return _filter_squoted(proc); } +template +FilterResult ParseEngine::filter_scalar_squoted_in_place(substr dst, size_t cap) +{ + FilterProcessorInplaceEndExtending proc(dst, cap); + return _filter_squoted(proc); +} + + //----------------------------------------------------------------------------- -void Parser::_handle_line() +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +/* double quoted */ + +// a debugging scaffold: +#if 0 +#define _c4dbgfdq(fmt, ...) _c4dbgpf("filt_dquo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__) +#else +#define _c4dbgfdq(...) +#endif + +template +template +void ParseEngine::_filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc) { - _c4dbgq("\n-----------"); - _c4dbgt("handling line={}, offset={}B", m_state->pos.line, m_state->pos.offset); - _RYML_CB_ASSERT(m_stack.m_callbacks, ! m_state->line_contents.rem.empty()); - if(has_any(RSEQ)) + _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n'); + + _c4dbgfdq("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar()); + size_t ii = proc.rpos; + const size_t numnl_following = _count_following_newlines(proc.src, &ii); + if(numnl_following) { - if(has_any(FLOW)) + proc.set('\n', numnl_following); + _c4dbgfdq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii); + } + else + { + const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1); + if(ret != npos) { - if(_handle_seq_flow()) - return; + proc.set(' '); + _c4dbgfdq("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar()); } else { - if(_handle_seq_blck()) - return; + proc.set(' '); + _c4dbgfdq("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar()); + } + if(ii < proc.src.len && proc.src.str[ii] == '\\') + { + _c4dbgfdq("backslash at [{}]", ii); + const char next = ii+1 < proc.src.len ? proc.src.str[ii+1] : '\0'; + if(next == ' ' || next == '\t') + { + _c4dbgfdq("extend skip to backslash", ""); + ++ii; + } } } - else if(has_any(RMAP)) + proc.rpos = ii; +} + +template +template +void ParseEngine::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc) +{ + char next = proc.next(); + _c4dbgfdq("backslash, next='{}'", _c4prc(next)); + if(next == '\r') { - if(has_any(FLOW)) + if(proc.rpos+2 < proc.src.len && proc.src.str[proc.rpos+2] == '\n') { - if(_handle_map_flow()) - return; + proc.skip(); // newline escaped with \ -- skip both (add only one as i is loop-incremented) + next = '\n'; + _c4dbgfdq("[{}]: was \\r\\n, now next='\\n'", proc.rpos); } - else + } + + if(next == '\n') + { + size_t ii = proc.rpos + 2; + for( ; ii < proc.src.len; ++ii) { - if(_handle_map_blck()) - return; + // skip leading whitespace + if(proc.src.str[ii] == ' ' || proc.src.str[ii] == '\t') + ; + else + break; } + proc.skip(ii - proc.rpos); } - else if(has_any(RUNK)) + else if(next == '"' || next == '/' || next == ' ' || next == '\t') { - if(_handle_unk()) - return; + // escapes for json compatibility + proc.translate_esc(next); + _c4dbgfdq("here, used '{}'", _c4prc(next)); } - - if(_handle_top()) - return; -} - - -//----------------------------------------------------------------------------- -bool Parser::_handle_unk() -{ - _c4dbgp("handle_unk"); - - csubstr rem = m_state->line_contents.rem; - const bool start_as_child = (node(m_state) == nullptr); - - if(C4_UNLIKELY(has_any(NDOC))) + else if(next == '\r') + { + proc.skip(); + } + else if(next == 'n') + { + proc.translate_esc('\n'); + } + else if(next == 'r') + { + proc.translate_esc('\r'); + } + else if(next == 't') + { + proc.translate_esc('\t'); + } + else if(next == '\\') + { + proc.translate_esc('\\'); + } + else if(next == 'x') // UTF8 + { + if(C4_UNLIKELY(proc.rpos + 1u + 2u >= proc.src.len)) + _c4err("\\x requires 2 hex digits. scalar pos={}", proc.rpos); + csubstr codepoint = proc.src.sub(proc.rpos + 2u, 2u); + _c4dbgfdq("utf8 ~~~{}~~~ rpos={} rem=~~~{}~~~", codepoint, proc.rpos, proc.src.sub(proc.rpos)); + uint8_t byteval = {}; + if(C4_UNLIKELY(!read_hex(codepoint, &byteval))) + _c4err("failed to read \\x codepoint. scalar pos={}", proc.rpos); + proc.translate_esc_bulk((const char*)&byteval, 1u, /*nread*/3u); + _c4dbgfdq("utf8 after rpos={} rem=~~~{}~~~", proc.rpos, proc.src.sub(proc.rpos)); + } + else if(next == 'u') // UTF16 + { + if(C4_UNLIKELY(proc.rpos + 1u + 4u >= proc.src.len)) + _c4err("\\u requires 4 hex digits. scalar pos={}", proc.rpos); + char readbuf[8]; + csubstr codepoint = proc.src.sub(proc.rpos + 2u, 4u); + uint32_t codepoint_val = {}; + if(C4_UNLIKELY(!read_hex(codepoint, &codepoint_val))) + _c4err("failed to parse \\u codepoint. scalar pos={}", proc.rpos); + const size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val); + if(C4_UNLIKELY(numbytes == 0)) + _c4err("failed to decode code point={}", proc.rpos); + _RYML_CB_ASSERT(callbacks(), numbytes <= 4); + proc.translate_esc_bulk(readbuf, numbytes, /*nread*/5u); + } + else if(next == 'U') // UTF32 + { + if(C4_UNLIKELY(proc.rpos + 1u + 8u >= proc.src.len)) + _c4err("\\U requires 8 hex digits. scalar pos={}", proc.rpos); + char readbuf[8]; + csubstr codepoint = proc.src.sub(proc.rpos + 2u, 8u); + uint32_t codepoint_val = {}; + if(C4_UNLIKELY(!read_hex(codepoint, &codepoint_val))) + _c4err("failed to parse \\U codepoint. scalar pos={}", proc.rpos); + const size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val); + if(C4_UNLIKELY(numbytes == 0)) + _c4err("failed to decode code point={}", proc.rpos); + _RYML_CB_ASSERT(callbacks(), numbytes <= 4); + proc.translate_esc_bulk(readbuf, numbytes, /*nread*/9u); + } + // https://yaml.org/spec/1.2.2/#rule-c-ns-esc-char + else if(next == '0') { - if(rem == "---" || rem.begins_with("--- ")) - { - _start_new_doc(rem); - return true; - } - auto trimmed = rem.triml(' '); - if(trimmed == "---" || trimmed.begins_with("--- ")) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, rem.len >= trimmed.len); - _line_progressed(rem.len - trimmed.len); - _start_new_doc(trimmed); - _save_indentation(); - return true; - } - else if(trimmed.begins_with("...")) - { - _end_stream(); - } - else if(trimmed.first_of("#%") == csubstr::npos) // neither a doc nor a tag - { - _c4dbgpf("starting implicit doc to accomodate unexpected tokens: '{}'", rem); - size_t indref = m_state->indref; - _push_level(); - _start_doc(); - _set_indentation(indref); - } - _RYML_CB_ASSERT(m_stack.m_callbacks, !trimmed.empty()); + proc.translate_esc('\0'); } - - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP)); - if(m_state->indref > 0) + else if(next == 'b') // backspace { - csubstr ws = rem.left_of(rem.first_not_of(' ')); - if(m_state->indref <= ws.len) - { - _c4dbgpf("skipping base indentation of {}", m_state->indref); - _line_progressed(m_state->indref); - rem = rem.sub(m_state->indref); - } + proc.translate_esc('\b'); } - - if(rem.begins_with("- ") _RYML_WITH_TAB_TOKENS( || rem.begins_with("-\t"))) + else if(next == 'f') // form feed { - _c4dbgpf("it's a seq (as_child={})", start_as_child); - _move_key_anchor_to_val_anchor(); - _move_key_tag_to_val_tag(); - _push_level(); - _start_seq(start_as_child); - _save_indentation(); - _line_progressed(2); - return true; + proc.translate_esc('\f'); } - else if(rem == '-') + else if(next == 'a') // bell character { - _c4dbgpf("it's a seq (as_child={})", start_as_child); - _move_key_anchor_to_val_anchor(); - _move_key_tag_to_val_tag(); - _push_level(); - _start_seq(start_as_child); - _save_indentation(); - _line_progressed(1); - return true; + proc.translate_esc('\a'); } - else if(rem.begins_with('[')) + else if(next == 'v') // vertical tab { - _c4dbgpf("it's a seq, flow (as_child={})", start_as_child); - _move_key_anchor_to_val_anchor(); - _move_key_tag_to_val_tag(); - _push_level(/*explicit flow*/true); - _start_seq(start_as_child); - add_flags(FLOW); - _line_progressed(1); - return true; + proc.translate_esc('\v'); } - else if(rem.begins_with('{')) + else if(next == 'e') // escape character { - _c4dbgpf("it's a map, flow (as_child={})", start_as_child); - _move_key_anchor_to_val_anchor(); - _move_key_tag_to_val_tag(); - _push_level(/*explicit flow*/true); - _start_map(start_as_child); - addrem_flags(FLOW|RKEY, RVAL); - _line_progressed(1); - return true; + proc.translate_esc('\x1b'); } - else if(rem.begins_with("? ")) + else if(next == '_') // unicode non breaking space \u00a0 { - _c4dbgpf("it's a map (as_child={}) + this key is complex", start_as_child); - _move_key_anchor_to_val_anchor(); - _move_key_tag_to_val_tag(); - _push_level(); - _start_map(start_as_child); - addrem_flags(RKEY|QMRK, RVAL); - _save_indentation(); - _line_progressed(2); - return true; + // https://www.compart.com/en/unicode/U+00a0 + const char payload[] = { + _RYML_CHCONST(-0x3e, 0xc2), + _RYML_CHCONST(-0x60, 0xa0), + }; + proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1); } - else if(rem.begins_with(": ") && !has_all(SSCL)) + else if(next == 'N') // unicode next line \u0085 { - _c4dbgp("it's a map with an empty key"); - _move_key_anchor_to_val_anchor(); - _move_key_tag_to_val_tag(); - _push_level(); - _start_map(start_as_child); - _store_scalar_null(rem.str); - addrem_flags(RVAL, RKEY); - _save_indentation(); - _line_progressed(2); - return true; + // https://www.compart.com/en/unicode/U+0085 + const char payload[] = { + _RYML_CHCONST(-0x3e, 0xc2), + _RYML_CHCONST(-0x7b, 0x85), + }; + proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1); } - else if(rem == ':' && !has_all(SSCL)) + else if(next == 'L') // unicode line separator \u2028 { - _c4dbgp("it's a map with an empty key"); - _move_key_anchor_to_val_anchor(); - _move_key_tag_to_val_tag(); - _push_level(); - _start_map(start_as_child); - _store_scalar_null(rem.str); - addrem_flags(RVAL, RKEY); - _save_indentation(); - _line_progressed(1); - return true; + // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex + const char payload[] = { + _RYML_CHCONST(-0x1e, 0xe2), + _RYML_CHCONST(-0x80, 0x80), + _RYML_CHCONST(-0x58, 0xa8), + }; + proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1); } - else if(_handle_types()) + else if(next == 'P') // unicode paragraph separator \u2029 { - return true; + // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex + const char payload[] = { + _RYML_CHCONST(-0x1e, 0xe2), + _RYML_CHCONST(-0x80, 0x80), + _RYML_CHCONST(-0x57, 0xa9), + }; + proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1); } - else if(!rem.begins_with('*') && _handle_key_anchors_and_refs()) + else if(next == '\0') { - return true; + proc.skip(); } - else if(has_all(SSCL)) + else { - _c4dbgpf("there's a stored scalar: '{}'", m_state->scalar); - - csubstr saved_scalar; - bool is_quoted; - if(_scan_scalar_unk(&saved_scalar, &is_quoted)) - { - rem = m_state->line_contents.rem; - _c4dbgpf("... and there's also a scalar next! '{}'", saved_scalar); - if(rem.begins_with_any(" \t")) - { - size_t n = rem.first_not_of(" \t"); - _c4dbgpf("skipping {} spaces/tabs", n); - rem = rem.sub(n); - _line_progressed(n); - } - } + _c4err("unknown character '{}' after '\\' pos={}", _c4prc(next), proc.rpos); + } + _c4dbgfdq("backslash...sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar()); +} - _c4dbgpf("rem='{}'", rem); - if(rem.begins_with(", ")) - { - _c4dbgpf("got a ',' -- it's a seq (as_child={})", start_as_child); - _start_seq(start_as_child); - add_flags(FLOW); - _append_val(_consume_scalar()); - _line_progressed(2); - } - else if(rem.begins_with(',')) - { - _c4dbgpf("got a ',' -- it's a seq (as_child={})", start_as_child); - _start_seq(start_as_child); - add_flags(FLOW); - _append_val(_consume_scalar()); - _line_progressed(1); - } - else if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))) - { - _c4dbgpf("got a ': ' -- it's a map (as_child={})", start_as_child); - _start_map_unk(start_as_child); // wait for the val scalar to append the key-val pair - _line_progressed(2); - } - else if(rem == ":" || rem.begins_with(":\"") || rem.begins_with(":'")) - { - if(rem == ":") { _c4dbgpf("got a ':' -- it's a map (as_child={})", start_as_child); } - else { _c4dbgpf("got a '{}' -- it's a map (as_child={})", rem.first(2), start_as_child); } - _start_map_unk(start_as_child); // wait for the val scalar to append the key-val pair - _line_progressed(1); // advance only 1 - } - else if(rem.begins_with('}')) - { - if(!has_all(RMAP|FLOW)) - { - _c4err("invalid token: not reading a map"); - } - if(!has_all(SSCL)) - { - _c4err("no scalar stored"); - } - _append_key_val(saved_scalar); - _stop_map(); - _line_progressed(1); - } - else if(rem.begins_with("...")) - { - _c4dbgp("got stream end '...'"); - _end_stream(); - _line_progressed(3); - } - else if(rem.begins_with('#')) - { - _c4dbgpf("it's a comment: '{}'", rem); - _scan_comment(); - return true; - } - else if(_handle_key_anchors_and_refs()) - { - return true; - } - else if(rem.begins_with(" ") || rem.begins_with("\t")) - { - size_t n = rem.first_not_of(" \t"); - if(n == npos) - n = rem.len; - _c4dbgpf("has {} spaces/tabs, skip...", n); - _line_progressed(n); - return true; - } - else if(rem.empty()) +template +template +auto ParseEngine::_filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result()) +{ + _c4dbgfdq("before=[{}]~~~{}~~~", proc.src.len, proc.src); + // from the YAML spec for double-quoted scalars: + // https://yaml.org/spec/1.2-old/spec.html#style/flow/double-quoted + while(proc.has_more_chars()) + { + const char curr = proc.curr(); + _c4dbgfdq("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar()); + switch(curr) { - // nothing to do - } - else if(rem == "---" || rem.begins_with("--- ")) + case ' ': + case '\t': { - _c4dbgp("caught ---: starting doc"); - _start_new_doc(rem); - return true; + _c4dbgfdq("whitespace", curr); + _filter_ws_copy_trailing(proc); + break; } - else if(rem.begins_with('%')) + case '\n': { - _c4dbgp("caught a directive: ignoring..."); - _line_progressed(rem.len); - return true; + _c4dbgfdq("newline", curr); + _filter_nl_dquoted(proc); + break; } - else + case '\r': // skip \r --- https://stackoverflow.com/questions/1885900 { - _c4err("parse error"); + _c4dbgfdq("carriage return, ignore", curr); + proc.skip(); + break; } - - if( ! saved_scalar.empty()) + case '\\': { - _store_scalar(saved_scalar, is_quoted); + _filter_dquoted_backslash(proc); + break; } - - return true; - } - else - { - _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(SSCL)); - csubstr scalar; - size_t indentation = m_state->line_contents.indentation; // save - bool is_quoted; - if(_scan_scalar_unk(&scalar, &is_quoted)) + default: { - _c4dbgpf("got a {} scalar", is_quoted ? "quoted" : ""); - rem = m_state->line_contents.rem; - { - size_t first = rem.first_not_of(" \t"); - if(first && first != npos) - { - _c4dbgpf("skip {} whitespace characters", first); - _line_progressed(first); - rem = rem.sub(first); - } - } - _store_scalar(scalar, is_quoted); - if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))) - { - _c4dbgpf("got a ': ' next -- it's a map (as_child={})", start_as_child); - _push_level(); - _start_map(start_as_child); // wait for the val scalar to append the key-val pair - _set_indentation(indentation); - _line_progressed(2); // call this AFTER saving the indentation - } - else if(rem == ":") - { - _c4dbgpf("got a ':' next -- it's a map (as_child={})", start_as_child); - _push_level(); - _start_map(start_as_child); // wait for the val scalar to append the key-val pair - _set_indentation(indentation); - _line_progressed(1); // call this AFTER saving the indentation - } - else - { - // we still don't know whether it's a seq or a map - // so just store the scalar - } - return true; + proc.copy(); + break; } - else if(rem.begins_with_any(" \t")) - { - csubstr ws = rem.left_of(rem.first_not_of(" \t")); - rem = rem.right_of(ws); - if(has_all(RTOP) && rem.begins_with("---")) - { - _c4dbgp("there's a doc starting, and it's indented"); - _set_indentation(ws.len); - } - _c4dbgpf("skipping {} spaces/tabs", ws.len); - _line_progressed(ws.len); - return true; } } - - return false; + _c4dbgfdq("after[{}]=~~~{}~~~", proc.wpos, proc.sofar()); + return proc.result(); } +#undef _c4dbgfdq -//----------------------------------------------------------------------------- -C4_ALWAYS_INLINE void Parser::_skipchars(char c) + +template +FilterResult ParseEngine::filter_scalar_dquoted(csubstr scalar, substr dst) { - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.rem.begins_with(c)); - size_t pos = m_state->line_contents.rem.first_not_of(c); - if(pos == npos) - pos = m_state->line_contents.rem.len; // maybe the line is just whitespace - _c4dbgpf("skip {} '{}'", pos, c); - _line_progressed(pos); + FilterProcessorSrcDst proc(scalar, dst); + return _filter_dquoted(proc); } -template -C4_ALWAYS_INLINE void Parser::_skipchars(const char (&chars)[N]) +template +FilterResultExtending ParseEngine::filter_scalar_dquoted_in_place(substr dst, size_t cap) { - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.rem.begins_with_any(chars)); - size_t pos = m_state->line_contents.rem.first_not_of(chars); - if(pos == npos) - pos = m_state->line_contents.rem.len; // maybe the line is just whitespace - _c4dbgpf("skip {} characters", pos); - _line_progressed(pos); + FilterProcessorInplaceMidExtending proc(dst, cap); + return _filter_dquoted(proc); } //----------------------------------------------------------------------------- -bool Parser::_handle_seq_flow() +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +// block filtering helpers + +template +template +void ParseEngine::_filter_chomp(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp, size_t indentation) { - _c4dbgpf("handle_seq_flow: node_id={} level={}", m_state->node_id, m_state->level); - csubstr rem = m_state->line_contents.rem; + _RYML_CB_ASSERT(this->callbacks(), chomp == CHOMP_CLIP || chomp == CHOMP_KEEP || chomp == CHOMP_STRIP); + _RYML_CB_ASSERT(this->callbacks(), proc.rem().first_not_of(" \n\r") == npos); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RSEQ|FLOW)); + // a debugging scaffold: + #if 0 + #define _c4dbgchomp(fmt, ...) _c4dbgpf("chomp[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__) + #else + #define _c4dbgchomp(...) + #endif - if(rem.begins_with(' ')) - { - // with explicit flow, indentation does not matter - _c4dbgp("starts with spaces"); - _skipchars(' '); - return true; - } - _RYML_WITH_TAB_TOKENS(else if(rem.begins_with('\t')) - { - _c4dbgp("starts with tabs"); - _skipchars('\t'); - return true; - }) - else if(rem.begins_with('#')) - { - _c4dbgp("it's a comment"); - rem = _scan_comment(); // also progresses the line - return true; - } - else if(rem.begins_with(']')) + // advance to the last line having spaces beyond the indentation { - _c4dbgp("end the sequence"); - _pop_level(); - _line_progressed(1); - if(has_all(RSEQIMAP)) + size_t last = _find_last_newline_and_larger_indentation(proc.rem(), indentation); + if(last != npos) { - _stop_seqimap(); - _pop_level(); + _c4dbgchomp("found newline and larger indentation. last={}", last); + last = proc.rpos + last + size_t(1) + indentation; // last started at to-be-read. + _RYML_CB_ASSERT(this->callbacks(), last <= proc.src.len); + // remove indentation spaces, copy the rest + while((proc.rpos < last) && proc.has_more_chars()) + { + const char curr = proc.curr(); + _c4dbgchomp("curr='{}'", _c4prc(curr)); + switch(curr) + { + case '\n': + { + _c4dbgchomp("newline! remlen={}", proc.rem().len); + proc.copy(); + // are there spaces after the newline? + csubstr at_next_line = proc.rem(); + if(at_next_line.begins_with(' ')) + { + _c4dbgchomp("next line begins with spaces. indentation={}", indentation); + // there are spaces. + size_t first_non_space = at_next_line.first_not_of(' '); + _c4dbgchomp("first_non_space={}", first_non_space); + if(first_non_space == npos) + { + _c4dbgchomp("{} spaces, to the end", at_next_line.len); + first_non_space = at_next_line.len; + } + if(first_non_space <= indentation) + { + _c4dbgchomp("skip spaces={}<=indentation={}", first_non_space, indentation); + proc.skip(first_non_space); + } + else + { + _c4dbgchomp("skip indentation={}node_id); - _start_seqimap(); - _line_progressed(1); - return true; - } - else if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))) - { - _c4dbgpf("found ': ' -- there's an implicit map in the seq node[{}]", m_state->node_id); - _start_seqimap(); - _line_progressed(2); - return true; - } - else if(rem.begins_with("? ")) - { - _c4dbgpf("found '? ' -- there's an implicit map in the seq node[{}]", m_state->node_id); - _start_seqimap(); - _line_progressed(2); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(SSCL) && m_state->scalar == ""); - addrem_flags(QMRK|RKEY, RVAL|SSCL); - return true; - } - else if(_handle_types()) - { - return true; - } - else if(_handle_val_anchors_and_refs()) - { - return true; - } - else if(rem.begins_with(", ")) - { - _c4dbgp("found ',' -- the value was null"); - _append_val_null(rem.str - 1); - _line_progressed(2); - return true; - } - else if(rem.begins_with(',')) - { - _c4dbgp("found ',' -- the value was null"); - _append_val_null(rem.str - 1); - _line_progressed(1); - return true; - } - else if(rem.begins_with('\t')) + case CHOMP_CLIP: + { + bool had_one = false; + while(proc.has_more_chars()) { - _skipchars('\t'); - return true; + const char curr = proc.curr(); + _c4dbgchomp("CLIP: '{}'", _c4prc(curr)); + switch(curr) + { + case '\n': + { + _c4dbgchomp("copy newline!", curr); + proc.copy(); + proc.set_at_end(); + had_one = true; + break; + } + case ' ': + case '\r': + _c4dbgchomp("skip!", curr); + proc.skip(); + break; + } } - else + if(!had_one) // there were no newline characters. add one. { - _c4err("parse error"); + _c4dbgchomp("chomp=CLIP: add missing newline @{}", proc.wpos); + proc.set('\n'); } + break; } - else if(has_any(RNXT)) + case CHOMP_KEEP: { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL)); - if(rem.begins_with(", ")) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(FLOW)); - _c4dbgp("seq: expect next val"); - addrem_flags(RVAL, RNXT); - _line_progressed(2); - return true; - } - else if(rem.begins_with(',')) + _c4dbgchomp("chomp=KEEP: copy all remaining new lines of {} characters", proc.rem().len); + while(proc.has_more_chars()) { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(FLOW)); - _c4dbgp("seq: expect next val"); - addrem_flags(RVAL, RNXT); - _line_progressed(1); - return true; + const char curr = proc.curr(); + _c4dbgchomp("KEEP: '{}'", _c4prc(curr)); + switch(curr) + { + case '\n': + _c4dbgchomp("copy newline!", curr); + proc.copy(); + break; + case ' ': + case '\r': + _c4dbgchomp("skip!", curr); + proc.skip(); + break; + } } - else if(rem == ':') + break; + } + case CHOMP_STRIP: + { + _c4dbgchomp("chomp=STRIP: strip {} characters", proc.rem().len); + // nothing to do! + break; + } + } + + #undef _c4dbgchomp +} + + +// a debugging scaffold: +#if 0 +#define _c4dbgfb(fmt, ...) _c4dbgpf("filt_block[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__) +#else +#define _c4dbgfb(...) +#endif + +template +template +void ParseEngine::_filter_block_indentation(FilterProcessor &C4_RESTRICT proc, size_t indentation) +{ + csubstr rem = proc.rem(); // remaining + if(rem.len) + { + size_t first = rem.first_not_of(' '); + if(first != npos) { - _c4dbgpf("found ':' -- there's an implicit map in the seq node[{}]", m_state->node_id); - _start_seqimap(); - _line_progressed(1); - return true; + _c4dbgfb("{} spaces follow before next nonws character", first); + if(first < indentation) + { + _c4dbgfb("skip {}<{} spaces from indentation", first, indentation); + proc.skip(first); + } + else + { + _c4dbgfb("skip {} spaces from indentation", indentation); + proc.skip(indentation); + } } - else if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))) + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + else { - _c4dbgpf("found ': ' -- there's an implicit map in the seq node[{}]", m_state->node_id); - _start_seqimap(); - _line_progressed(2); - return true; + _c4dbgfb("all spaces to the end: {} spaces", first); + first = rem.len; + if(first) + { + if(first < indentation) + { + _c4dbgfb("skip everything", first); + proc.skip(proc.src.len - proc.rpos); + } + else + { + _c4dbgfb("skip {} spaces from indentation", indentation); + proc.skip(indentation); + } + } } - else + #endif + } +} + +template +template +size_t ParseEngine::_handle_all_whitespace(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp) +{ + csubstr contents = proc.src.trimr(" \n\r"); + _c4dbgfb("ws: contents_len={} wslen={}", contents.len, proc.src.len-contents.len); + if(!contents.len) + { + _c4dbgfb("ws: all whitespace: len={}", proc.src.len); + if(chomp == CHOMP_KEEP && proc.src.len) { - _c4err("was expecting a comma"); + _c4dbgfb("ws: chomp=KEEP all {} newlines", proc.src.count('\n')); + while(proc.has_more_chars()) + { + const char curr = proc.curr(); + if(curr == '\n') + proc.copy(); + else + proc.skip(); + } + if(!proc.wpos) + { + proc.set('\n'); + } } } + return contents.len; +} + +template +template +size_t ParseEngine::_extend_to_chomp(FilterProcessor &C4_RESTRICT proc, size_t contents_len) +{ + _c4dbgfb("contents_len={}", contents_len); + + _RYML_CB_ASSERT(this->callbacks(), contents_len > 0u); + + // extend contents to just before the first newline at the end, + // in case it is preceded by spaces + size_t firstnewl = proc.src.first_of('\n', contents_len); + if(firstnewl != npos) + { + contents_len = firstnewl; + _c4dbgfb("contents_len={} <--- firstnewl={}", contents_len, firstnewl); + } else { - _c4err("internal error"); + contents_len = proc.src.len; + _c4dbgfb("contents_len={} <--- src.len={}", contents_len, proc.src.len); } - return true; + return contents_len; } +#undef _c4dbgfb + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- //----------------------------------------------------------------------------- -bool Parser::_handle_seq_blck() + +// a debugging scaffold: +#if 0 +#define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block_lit[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__) +#else +#define _c4dbgfbl(...) +#endif + +template +template +auto ParseEngine::_filter_block_literal(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result()) { - _c4dbgpf("handle_seq_impl: node_id={} level={}", m_state->node_id, m_state->level); - csubstr rem = m_state->line_contents.rem; + _c4dbgfbl("indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RSEQ)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(FLOW)); + size_t contents_len = _handle_all_whitespace(proc, chomp); + if(!contents_len) + return proc.result(); - if(rem.begins_with('#')) - { - _c4dbgp("it's a comment"); - rem = _scan_comment(); - return true; - } - if(has_any(RNXT)) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL)); + contents_len = _extend_to_chomp(proc, contents_len); - if(_handle_indentation()) - return true; + _c4dbgfbl("to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len)); - if(rem.begins_with("- ") _RYML_WITH_TAB_TOKENS( || rem.begins_with("-\t"))) - { - _c4dbgp("expect another val"); - addrem_flags(RVAL, RNXT); - _line_progressed(2); - return true; - } - else if(rem == '-') + _filter_block_indentation(proc, indentation); + + // now filter the bulk + while(proc.has_more_chars(/*maxpos*/contents_len)) + { + const char curr = proc.curr(); + _c4dbgfbl("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar()); + switch(curr) { - _c4dbgp("expect another val"); - addrem_flags(RVAL, RNXT); - _line_progressed(1); - return true; - } - else if(rem.begins_with_any(" \t")) + case '\n': { - _RYML_CB_ASSERT(m_stack.m_callbacks, ! _at_line_begin()); - _skipchars(" \t"); - return true; + _c4dbgfbl("found newline. skip indentation on the next line", curr); + proc.copy(); // copy the newline + _filter_block_indentation(proc, indentation); + break; } - else if(rem.begins_with("...")) - { - _c4dbgp("got stream end '...'"); - _end_stream(); - _line_progressed(3); - return true; + case '\r': + proc.skip(); + break; + default: + proc.copy(); + break; } - else if(rem.begins_with("---")) + } + + _c4dbgfbl("before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar()); + + _filter_chomp(proc, chomp, indentation); + + _c4dbgfbl("final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar()); + + return proc.result(); +} + +#undef _c4dbgfbl + +template +FilterResult ParseEngine::filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp) +{ + FilterProcessorSrcDst proc(scalar, dst); + return _filter_block_literal(proc, indentation, chomp); +} + +template +FilterResult ParseEngine::filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp) +{ + FilterProcessorInplaceEndExtending proc(scalar, cap); + return _filter_block_literal(proc, indentation, chomp); +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +// a debugging scaffold: +#if 0 +#define _c4dbgfbf(fmt, ...) _c4dbgpf("filt_block_folded[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__) +#else +#define _c4dbgfbf(...) +#endif + + +template +template +void ParseEngine::_filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len) +{ + _filter_block_indentation(proc, indentation); + while(proc.has_more_chars(len)) + { + const char curr = proc.curr(); + _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar()); + switch(curr) { - _c4dbgp("got document start '---'"); - _start_new_doc(rem); - return true; + case '\n': + _c4dbgfbf("newline.", curr); + proc.copy(); + _filter_block_indentation(proc, indentation); + break; + case '\r': + proc.skip(); + break; + case ' ': + case '\t': + { + size_t first = proc.rem().first_not_of(" \t"); + _c4dbgfbf("space. first={}", first); + if(first == npos) + first = proc.rem().len; + _c4dbgfbf("... indentation increased to {}", first); + _filter_block_folded_indented_block(proc, indentation, len, first); + break; } - else - { - _c4err("parse error"); + default: + _c4dbgfbf("newl leading: not space, not newline. stop.", 0); + return; } } - else if(has_any(RVAL)) - { - // there can be empty values - if(_handle_indentation()) - return true; - - csubstr s; - bool is_quoted; - if(_scan_scalar_seq_blck(&s, &is_quoted)) // this also progresses the line - { - _c4dbgpf("it's a{} scalar", is_quoted ? " quoted" : ""); - - rem = m_state->line_contents.rem; - if(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(rem.begins_with_any(" \t"), rem.begins_with(' '))) - { - _c4dbgp("skipping whitespace..."); - size_t skip = rem.first_not_of(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); - if(skip == csubstr::npos) - skip = rem.len; // maybe the line is just whitespace - _line_progressed(skip); - rem = rem.sub(skip); - } +} - _c4dbgpf("rem=[{}]~~~{}~~~", rem.len, rem); - if(!rem.begins_with('#') && (rem.ends_with(':') || rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t")))) +template +template +size_t ParseEngine::_filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc, size_t num_newl, size_t wpos_at_first_newl) +{ + switch(num_newl) + { + case 1u: + _c4dbgfbf("... this is the first newline. turn into space. wpos={}", proc.wpos); + wpos_at_first_newl = proc.wpos; + proc.skip(); + proc.set(' '); + break; + case 2u: + _c4dbgfbf("... this is the second newline. prev space (at wpos={}) must be newline", wpos_at_first_newl); + _RYML_CB_ASSERT(this->callbacks(), wpos_at_first_newl != npos); + _RYML_CB_ASSERT(this->callbacks(), proc.sofar()[wpos_at_first_newl] == ' '); + _RYML_CB_ASSERT(this->callbacks(), wpos_at_first_newl + 1u == proc.wpos); + proc.skip(); + proc.set_at(wpos_at_first_newl, '\n'); + _RYML_CB_ASSERT(this->callbacks(), proc.sofar()[wpos_at_first_newl] == '\n'); + break; + default: + _c4dbgfbf("... subsequent newline (num_newl={}). copy", num_newl); + proc.copy(); + break; + } + return wpos_at_first_newl; +} + +template +template +void ParseEngine::_filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len) +{ + _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n'); + size_t num_newl = 0; + size_t wpos_at_first_newl = npos; + while(proc.has_more_chars(len)) + { + const char curr = proc.curr(); + _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar()); + switch(curr) + { + case '\n': + { + _c4dbgfbf("newline. sofar={}", num_newl); + // NOTE: vs2022-32bit-release builds were giving wrong + // results in this block, if it was written as either + // as a switch(num_newl) or its equivalent if-form. + // + // For this reason, we're using a dedicated function + // (**_compress), which seems to work around the issue. + // + // The manifested problem was that somewhere between the + // assignment to curr and this point, proc.wpos (the + // write-position of the processor) jumped to npos, which + // made the write wrap-around! To make things worse, + // enabling prints via _c4dbgpf() and _c4dbgfbf() made the + // problem go away! + // + // The only way to make the problem appear with prints + // enabled was by disabling all prints in this function + // (including in the block which was moved to the compress + // function) and then selectively enabling only some of + // those prints. + // + // This may be due to some bug in the cl-x86 optimizer; or + // it may be triggered by some UB which may be + // inadvertedly present in this function or in the filter + // processor. This is despite our best efforts to weed out + // any such UB problem: neither clang-tidy nor none of the + // sanitizers, or gcc's -fanalyzer pointed to any problems + // in this code. + // + // In the end, moving this block to a separate function + // was the only way to bury the problem. But it may + // resurface again, as The Undead, rising to from the + // grave to haunt us with his terrible presence. + // + // We may have to revisit this. With a stake, and lots of + // garlic. + wpos_at_first_newl = _filter_block_folded_newlines_compress(proc, ++num_newl, wpos_at_first_newl); + _filter_block_indentation(proc, indentation); + break; + } + case ' ': + case '\t': { - _c4dbgp("actually, the scalar is the first key of a map, and it opens a new scope"); - if(m_key_anchor.empty()) - _move_val_anchor_to_key_anchor(); - if(m_key_tag.empty()) - _move_val_tag_to_key_tag(); - addrem_flags(RNXT, RVAL); // before _push_level! This prepares the current level for popping by setting it to RNXT - _push_level(); - _start_map(); - _store_scalar(s, is_quoted); - if( ! _maybe_set_indentation_from_anchor_or_tag()) + size_t first = proc.rem().first_not_of(" \t"); + _c4dbgfbf("space. first={}", first); + if(first == npos) + first = proc.rem().len; + _c4dbgfbf("... indentation increased to {}", first); + if(num_newl) { - _c4dbgpf("set indentation from scalar: {}", m_state->scalar_col); - _set_indentation(m_state->scalar_col); // this is the column where the scalar starts + _c4dbgfbf("... prev space (at wpos={}) must be newline", wpos_at_first_newl); + proc.set_at(wpos_at_first_newl, '\n'); } - _move_key_tag2_to_key_tag(); - addrem_flags(RVAL, RKEY); - _line_progressed(1); - } - else - { - _c4dbgp("appending val to current seq"); - _append_val(s, is_quoted); - addrem_flags(RNXT, RVAL); + if(num_newl > 1u) + { + _c4dbgfbf("... add missing newline", wpos_at_first_newl); + proc.set('\n'); + } + _filter_block_folded_indented_block(proc, indentation, len, first); + num_newl = 0; + wpos_at_first_newl = npos; + break; } - return true; - } - else if(rem.begins_with("- ") _RYML_WITH_TAB_TOKENS( || rem.begins_with("-\t"))) - { - if(_rval_dash_start_or_continue_seq()) - _line_progressed(2); - return true; - } - else if(rem == '-') - { - if(_rval_dash_start_or_continue_seq()) - _line_progressed(1); - return true; - } - else if(rem.begins_with('[')) - { - _c4dbgp("val is a child seq, flow"); - addrem_flags(RNXT, RVAL); // before _push_level! - _push_level(/*explicit flow*/true); - _start_seq(); - add_flags(FLOW); - _line_progressed(1); - return true; - } - else if(rem.begins_with('{')) - { - _c4dbgp("val is a child map, flow"); - addrem_flags(RNXT, RVAL); // before _push_level! - _push_level(/*explicit flow*/true); - _start_map(); - addrem_flags(FLOW|RKEY, RVAL); - _line_progressed(1); - return true; - } - else if(rem.begins_with("? ")) - { - _c4dbgp("val is a child map + this key is complex"); - addrem_flags(RNXT, RVAL); // before _push_level! - _push_level(); - _start_map(); - addrem_flags(QMRK|RKEY, RVAL); - _save_indentation(); - _line_progressed(2); - return true; + case '\r': + proc.skip(); + break; + default: + _c4dbgfbf("not space, not newline. stop.", 0); + return; } - else if(rem.begins_with(' ')) + } +} + + +template +template +void ParseEngine::_filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len, size_t curr_indentation) noexcept +{ + _RYML_CB_ASSERT(this->callbacks(), (proc.rem().first_not_of(" \t") == curr_indentation) || (proc.rem().first_not_of(" \t") == npos)); + if(curr_indentation) + proc.copy(curr_indentation); + while(proc.has_more_chars(len)) + { + const char curr = proc.curr(); + _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar()); + switch(curr) { - csubstr spc = rem.left_of(rem.first_not_of(' ')); - if(_at_line_begin()) - { - _c4dbgpf("skipping value indentation: {} spaces", spc.len); - _line_progressed(spc.len); - return true; - } - else + case '\n': { - _c4dbgpf("skipping {} spaces", spc.len); - _line_progressed(spc.len); - return true; + proc.copy(); + _filter_block_indentation(proc, indentation); + csubstr rem = proc.rem(); + const size_t first = rem.first_not_of(' '); + _c4dbgfbf("newline. firstns={}", first); + if(first == 0) + { + const char c = rem[first]; + _c4dbgfbf("firstns={}='{}'", first, _c4prc(c)); + if(c == '\n' || c == '\r') + { + ; + } + else + { + _c4dbgfbf("done with indented block", first); + goto endloop; + } + } + else if(first != npos) + { + proc.copy(first); + _c4dbgfbf("copy all {} spaces", first); + } + break; } + break; + case '\r': + proc.skip(); + break; + default: + proc.copy(); + break; } - else if(_handle_types()) + } + endloop: + return; +} + + +template +template +auto ParseEngine::_filter_block_folded(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result()) +{ + _c4dbgfbf("indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src); + + size_t contents_len = _handle_all_whitespace(proc, chomp); + if(!contents_len) + return proc.result(); + + contents_len = _extend_to_chomp(proc, contents_len); + + _c4dbgfbf("to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len)); + + _filter_block_folded_newlines_leading(proc, indentation, contents_len); + + // now filter the bulk + while(proc.has_more_chars(/*maxpos*/contents_len)) + { + const char curr = proc.curr(); + _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar()); + switch(curr) { - return true; - } - else if(_handle_val_anchors_and_refs()) + case '\n': { - return true; - } - /* pathological case: - * - &key : val - * - &key : - * - : val - */ - else if((!has_all(SSCL)) && - (rem.begins_with(": ") || rem.left_of(rem.find("#")).trimr("\t") == ":")) - { - if(!m_val_anchor.empty() || !m_val_tag.empty()) - { - _c4dbgp("val is a child map + this key is empty, with anchors or tags"); - addrem_flags(RNXT, RVAL); // before _push_level! - _move_val_tag_to_key_tag(); - _move_val_anchor_to_key_anchor(); - _push_level(); - _start_map(); - _store_scalar_null(rem.str); - addrem_flags(RVAL, RKEY); - RYML_CHECK(_maybe_set_indentation_from_anchor_or_tag()); // one of them must exist - _line_progressed(rem.begins_with(": ") ? 2u : 1u); - return true; - } - else - { - _c4dbgp("val is a child map + this key is empty, no anchors or tags"); - addrem_flags(RNXT, RVAL); // before _push_level! - size_t ind = m_state->indref; - _push_level(); - _start_map(); - _store_scalar_null(rem.str); - addrem_flags(RVAL, RKEY); - _c4dbgpf("set indentation from map anchor: {}", ind + 2); - _set_indentation(ind + 2); // this is the column where the map starts - _line_progressed(rem.begins_with(": ") ? 2u : 1u); - return true; - } + _c4dbgfbf("found newline", curr); + _filter_block_folded_newlines(proc, indentation, contents_len); + break; } - else - { - _c4err("parse error"); + case '\r': + proc.skip(); + break; + default: + proc.copy(); + break; } } - return false; + _c4dbgfbf("before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar()); + + _filter_chomp(proc, chomp, indentation); + + _c4dbgfbf("final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar()); + + return proc.result(); +} + +#undef _c4dbgfbf + +template +FilterResult ParseEngine::filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp) +{ + FilterProcessorSrcDst proc(scalar, dst); + return _filter_block_folded(proc, indentation, chomp); +} + +template +FilterResult ParseEngine::filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp) +{ + FilterProcessorInplaceEndExtending proc(scalar, cap); + return _filter_block_folded(proc, indentation, chomp); } + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- //----------------------------------------------------------------------------- -bool Parser::_rval_dash_start_or_continue_seq() +template +csubstr ParseEngine::_filter_scalar_plain(substr s, size_t indentation) { - size_t ind = m_state->line_contents.current_col(); - _RYML_CB_ASSERT(m_stack.m_callbacks, ind >= m_state->indref); - size_t delta_ind = ind - m_state->indref; - if( ! delta_ind) - { - _c4dbgp("prev val was empty"); - addrem_flags(RNXT, RVAL); - _append_val_null(&m_state->line_contents.full[ind]); - return false; - } - _c4dbgp("val is a nested seq, indented"); - addrem_flags(RNXT, RVAL); // before _push_level! - _push_level(); - _start_seq(); - _save_indentation(); - return true; + _c4dbgpf("filtering plain scalar: s=[{}]~~~{}~~~", s.len, s); + FilterResult r = this->filter_scalar_plain_in_place(s, s.len, indentation); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, r.valid()); + _c4dbgpf("filtering plain scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get()); + return r.get(); } //----------------------------------------------------------------------------- -bool Parser::_handle_map_flow() + +template +csubstr ParseEngine::_filter_scalar_squot(substr s) { - // explicit flow, ie, inside {}, separated by commas - _c4dbgpf("handle_map_flow: node_id={} level={}", m_state->node_id, m_state->level); - csubstr rem = m_state->line_contents.rem; + _c4dbgpf("filtering squo scalar: s=[{}]~~~{}~~~", s.len, s); + FilterResult r = this->filter_scalar_squoted_in_place(s, s.len); + _RYML_CB_ASSERT(this->callbacks(), r.valid()); + _c4dbgpf("filtering squo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get()); + return r.get(); +} - _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RMAP|FLOW)); - if(rem.begins_with(' ')) +//----------------------------------------------------------------------------- + +template +csubstr ParseEngine::_filter_scalar_dquot(substr s) +{ + _c4dbgpf("filtering dquo scalar: s=[{}]~~~{}~~~", s.len, s); + FilterResultExtending r = this->filter_scalar_dquoted_in_place(s, s.len); + if(C4_LIKELY(r.valid())) { - // with explicit flow, indentation does not matter - _c4dbgp("starts with spaces"); - _skipchars(' '); - return true; + _c4dbgpf("filtering dquo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get()); + return r.get(); } - _RYML_WITH_TAB_TOKENS(else if(rem.begins_with('\t')) + else { - // with explicit flow, indentation does not matter - _c4dbgp("starts with tabs"); - _skipchars('\t'); - return true; - }) - else if(rem.begins_with('#')) + const size_t len = r.required_len(); + _c4dbgpf("filtering dquo scalar: not enough space: needs {}, have {}", len, s.len); + substr dst = m_evt_handler->alloc_arena(len, &s); + _c4dbgpf("filtering dquo scalar: dst.len={}", dst.len); + _RYML_CB_ASSERT(this->callbacks(), dst.len == len); + FilterResult rsd = this->filter_scalar_dquoted(s, dst); + _c4dbgpf("filtering dquo scalar: ... result now needs {} was {}", rsd.required_len(), len); + _RYML_CB_ASSERT(this->callbacks(), rsd.required_len() <= len); // may be smaller! + _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid()); + _c4dbgpf("filtering dquo scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get()); + return rsd.get(); + } +} + + +//----------------------------------------------------------------------------- +template +csubstr ParseEngine::_filter_scalar_literal(substr s, size_t indentation, BlockChomp_e chomp) +{ + _c4dbgpf("filtering block literal scalar: s=[{}]~~~{}~~~", s.len, s); + FilterResult r = this->filter_scalar_block_literal_in_place(s, s.len, indentation, chomp); + if(C4_LIKELY(r.valid())) { - _c4dbgp("it's a comment"); - rem = _scan_comment(); // also progresses the line - return true; + _c4dbgpf("filtering block literal scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get()); + return r.get(); } - else if(rem.begins_with('}')) + else { - _c4dbgp("end the map"); - if(has_all(SSCL)) - { - _c4dbgp("the last val was null"); - _append_key_val_null(rem.str - 1); - rem_flags(RVAL); - } - _pop_level(); - _line_progressed(1); - if(has_all(RSEQIMAP)) - { - _c4dbgp("stopping implicitly nested 1x map"); - _stop_seqimap(); - _pop_level(); - } - return true; + _c4dbgpf("filtering block literal scalar: not enough space: needs {}, have {}", r.required_len(), s.len); + substr dst = m_evt_handler->alloc_arena(r.required_len(), &s); + FilterResult rsd = this->filter_scalar_block_literal(s, dst, indentation, chomp); + _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid()); + _c4dbgpf("filtering block literal scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get()); + return rsd.get(); } +} - if(has_any(RNXT)) + +//----------------------------------------------------------------------------- +template +csubstr ParseEngine::_filter_scalar_folded(substr s, size_t indentation, BlockChomp_e chomp) +{ + _c4dbgpf("filtering block folded scalar: s=[{}]~~~{}~~~", s.len, s); + FilterResult r = this->filter_scalar_block_folded_in_place(s, s.len, indentation, chomp); + if(C4_LIKELY(r.valid())) + { + _c4dbgpf("filtering block folded scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get()); + return r.get(); + } + else { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RSEQIMAP)); + _c4dbgpf("filtering block folded scalar: not enough space: needs {}, have {}", r.required_len(), s.len); + substr dst = m_evt_handler->alloc_arena(r.required_len(), &s); + FilterResult rsd = this->filter_scalar_block_folded(s, dst, indentation, chomp); + _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid()); + _c4dbgpf("filtering block folded scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get()); + return rsd.get(); + } +} - if(rem.begins_with(", ")) - { - _c4dbgp("seq: expect next keyval"); - addrem_flags(RKEY, RNXT); - _line_progressed(2); - return true; - } - else if(rem.begins_with(',')) - { - _c4dbgp("seq: expect next keyval"); - addrem_flags(RKEY, RNXT); - _line_progressed(1); - return true; + +//----------------------------------------------------------------------------- + +template +csubstr ParseEngine::_maybe_filter_key_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation) +{ + if(sc.needs_filter) + { + if(m_options.scalar_filtering()) + { + return _filter_scalar_plain(sc.scalar, indentation); } else { - _c4err("parse error"); + _c4dbgp("plain scalar left unfiltered"); + m_evt_handler->mark_key_scalar_unfiltered(); } } - else if(has_any(RKEY)) + else { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL)); - - bool is_quoted; - if(has_none(SSCL) && _scan_scalar_map_flow(&rem, &is_quoted)) - { - _c4dbgp("it's a scalar"); - _store_scalar(rem, is_quoted); - rem = m_state->line_contents.rem; - csubstr trimmed = rem.triml(" \t"); - if(trimmed.len && (trimmed.begins_with(": ") || trimmed.begins_with_any(":,}") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t")))) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, trimmed.str >= rem.str); - size_t num = static_cast(trimmed.str - rem.str); - _c4dbgpf("trimming {} whitespace after the scalar: '{}' --> '{}'", num, rem, rem.sub(num)); - rem = rem.sub(num); - _line_progressed(num); - } - } + _c4dbgp("plain scalar doesn't need filtering"); + } + return sc.scalar; +} - if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))) - { - _c4dbgp("wait for val"); - addrem_flags(RVAL, RKEY|QMRK); - _line_progressed(2); - if(!has_all(SSCL)) - { - _c4dbgp("no key was found, defaulting to empty key ''"); - _store_scalar_null(rem.str); - } - return true; - } - else if(rem == ':') - { - _c4dbgp("wait for val"); - addrem_flags(RVAL, RKEY|QMRK); - _line_progressed(1); - if(!has_all(SSCL)) - { - _c4dbgp("no key was found, defaulting to empty key ''"); - _store_scalar_null(rem.str); - } - return true; - } - else if(rem.begins_with('?')) - { - _c4dbgp("complex key"); - add_flags(QMRK); - _line_progressed(1); - return true; - } - else if(rem.begins_with(',')) - { - _c4dbgp("prev scalar was a key with null value"); - _append_key_val_null(rem.str - 1); - _line_progressed(1); - return true; - } - else if(rem.begins_with('}')) - { - _c4dbgp("map terminates after a key..."); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(SSCL)); - _c4dbgp("the last val was null"); - _append_key_val_null(rem.str - 1); - rem_flags(RVAL); - if(has_all(RSEQIMAP)) - { - _c4dbgp("stopping implicitly nested 1x map"); - _stop_seqimap(); - _pop_level(); - } - _pop_level(); - _line_progressed(1); - return true; - } - else if(_handle_types()) - { - return true; - } - else if(_handle_key_anchors_and_refs()) - { - return true; - } - else if(rem == "") +template +csubstr ParseEngine::_maybe_filter_val_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation) +{ + if(sc.needs_filter) + { + if(m_options.scalar_filtering()) { - return true; + return _filter_scalar_plain(sc.scalar, indentation); } else { - size_t pos = rem.first_not_of(" \t"); - if(pos == csubstr::npos) - pos = 0; - rem = rem.sub(pos); - if(rem.begins_with(':')) - { - _c4dbgp("wait for val"); - addrem_flags(RVAL, RKEY|QMRK); - _line_progressed(pos + 1); - if(!has_all(SSCL)) - { - _c4dbgp("no key was found, defaulting to empty key ''"); - _store_scalar_null(rem.str); - } - return true; - } - else if(rem.begins_with('#')) - { - _c4dbgp("it's a comment"); - _line_progressed(pos); - rem = _scan_comment(); // also progresses the line - return true; - } - else - { - _c4err("parse error"); - } + _c4dbgp("plain scalar left unfiltered"); + m_evt_handler->mark_val_scalar_unfiltered(); } } - else if(has_any(RVAL)) + else + { + _c4dbgp("plain scalar doesn't need filtering"); + } + return sc.scalar; +} + + +//----------------------------------------------------------------------------- + +template +csubstr ParseEngine::_maybe_filter_key_scalar_squot(ScannedScalar const& C4_RESTRICT sc) +{ + if(sc.needs_filter) { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(SSCL)); - bool is_quoted; - if(_scan_scalar_map_flow(&rem, &is_quoted)) + if(m_options.scalar_filtering()) { - _c4dbgp("it's a scalar"); - addrem_flags(RNXT, RVAL|RKEY); - _append_key_val(rem, is_quoted); - if(has_all(RSEQIMAP)) - { - _c4dbgp("stopping implicitly nested 1x map"); - _stop_seqimap(); - _pop_level(); - } - return true; + return _filter_scalar_squot(sc.scalar); } - else if(rem.begins_with('[')) + else { - _c4dbgp("val is a child seq"); - addrem_flags(RNXT, RVAL|RKEY); // before _push_level! - _push_level(/*explicit flow*/true); - _move_scalar_from_top(); - _start_seq(); - add_flags(FLOW); - _line_progressed(1); - return true; + _c4dbgp("squo key scalar left unfiltered"); + m_evt_handler->mark_key_scalar_unfiltered(); } - else if(rem.begins_with('{')) + } + else + { + _c4dbgp("squo key scalar doesn't need filtering"); + } + return sc.scalar; +} + +template +csubstr ParseEngine::_maybe_filter_val_scalar_squot(ScannedScalar const& C4_RESTRICT sc) +{ + if(sc.needs_filter) + { + if(m_options.scalar_filtering()) { - _c4dbgp("val is a child map"); - addrem_flags(RNXT, RVAL|RKEY); // before _push_level! - _push_level(/*explicit flow*/true); - _move_scalar_from_top(); - _start_map(); - addrem_flags(FLOW|RKEY, RNXT|RVAL); - _line_progressed(1); - return true; + return _filter_scalar_squot(sc.scalar); } - else if(_handle_types()) + else { - return true; + _c4dbgp("squo val scalar left unfiltered"); + m_evt_handler->mark_val_scalar_unfiltered(); } - else if(_handle_val_anchors_and_refs()) + } + else + { + _c4dbgp("squo val scalar doesn't need filtering"); + } + return sc.scalar; +} + + +//----------------------------------------------------------------------------- + +template +csubstr ParseEngine::_maybe_filter_key_scalar_dquot(ScannedScalar const& C4_RESTRICT sc) +{ + if(sc.needs_filter) + { + if(m_options.scalar_filtering()) { - return true; + return _filter_scalar_dquot(sc.scalar); } - else if(rem.begins_with(',')) + else { - _c4dbgp("appending empty val"); - _append_key_val_null(rem.str - 1); - addrem_flags(RKEY, RVAL); - _line_progressed(1); - if(has_any(RSEQIMAP)) - { - _c4dbgp("stopping implicitly nested 1x map"); - _stop_seqimap(); - _pop_level(); - } - return true; + _c4dbgp("dquo scalar left unfiltered"); + m_evt_handler->mark_key_scalar_unfiltered(); } - else if(has_any(RSEQIMAP) && rem.begins_with(']')) + } + else + { + _c4dbgp("dquo scalar doesn't need filtering"); + } + return sc.scalar; +} + +template +csubstr ParseEngine::_maybe_filter_val_scalar_dquot(ScannedScalar const& C4_RESTRICT sc) +{ + if(sc.needs_filter) + { + if(m_options.scalar_filtering()) { - _c4dbgp("stopping implicitly nested 1x map"); - if(has_any(SSCL)) - { - _append_key_val_null(rem.str - 1); - } - _stop_seqimap(); - _pop_level(); - return true; + return _filter_scalar_dquot(sc.scalar); } else { - _c4err("parse error"); + _c4dbgp("dquo scalar left unfiltered"); + m_evt_handler->mark_val_scalar_unfiltered(); } } else { - _c4err("internal error"); + _c4dbgp("dquo scalar doesn't need filtering"); } - - return false; + return sc.scalar; } -//----------------------------------------------------------------------------- -bool Parser::_handle_map_blck() -{ - _c4dbgpf("handle_map_blck: node_id={} level={}", m_state->node_id, m_state->level); - csubstr rem = m_state->line_contents.rem; - _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RMAP)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(FLOW)); +//----------------------------------------------------------------------------- - if(rem.begins_with('#')) +template +csubstr ParseEngine::_maybe_filter_key_scalar_literal(ScannedBlock const& C4_RESTRICT sb) +{ + if(m_options.scalar_filtering()) { - _c4dbgp("it's a comment"); - rem = _scan_comment(); - return true; + return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp); + } + else + { + _c4dbgp("literal scalar left unfiltered"); + m_evt_handler->mark_key_scalar_unfiltered(); } + return sb.scalar; +} - if(has_any(RNXT)) +template +csubstr ParseEngine::_maybe_filter_val_scalar_literal(ScannedBlock const& C4_RESTRICT sb) +{ + if(m_options.scalar_filtering()) + { + return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp); + } + else { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL)); - // actually, we don't need RNXT in indent-based maps. - addrem_flags(RKEY, RNXT); + _c4dbgp("literal scalar left unfiltered"); + m_evt_handler->mark_val_scalar_unfiltered(); } + return sb.scalar; +} + - if(_handle_indentation()) +//----------------------------------------------------------------------------- + +template +csubstr ParseEngine::_maybe_filter_key_scalar_folded(ScannedBlock const& C4_RESTRICT sb) +{ + if(m_options.scalar_filtering()) { - _c4dbgp("indentation token"); - return true; + return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp); } + else + { + _c4dbgp("folded scalar left unfiltered"); + m_evt_handler->mark_key_scalar_unfiltered(); + } + return sb.scalar; +} - if(has_any(RKEY)) +template +csubstr ParseEngine::_maybe_filter_val_scalar_folded(ScannedBlock const& C4_RESTRICT sb) +{ + if(m_options.scalar_filtering()) + { + return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp); + } + else { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL)); + _c4dbgp("folded scalar left unfiltered"); + m_evt_handler->mark_val_scalar_unfiltered(); + } + return sb.scalar; +} - _c4dbgp("RMAP|RKEY read scalar?"); - bool is_quoted; - if(_scan_scalar_map_blck(&rem, &is_quoted)) // this also progresses the line - { - _c4dbgpf("it's a{} scalar", is_quoted ? " quoted" : ""); - if(has_all(QMRK|SSCL)) - { - _c4dbgpf("current key is QMRK; SSCL is set. so take store scalar='{}' as key and add an empty val", m_state->scalar); - _append_key_val_null(rem.str - 1); - } - _store_scalar(rem, is_quoted); - if(has_all(QMRK|RSET)) - { - _c4dbgp("it's a complex key, so use null value '~'"); - _append_key_val_null(rem.str); - } - rem = m_state->line_contents.rem; - if(rem.begins_with(':')) - { - _c4dbgp("wait for val"); - addrem_flags(RVAL, RKEY|QMRK); - _line_progressed(1); - rem = m_state->line_contents.rem; - if(rem.begins_with_any(" \t")) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, ! _at_line_begin()); - rem = rem.left_of(rem.first_not_of(" \t")); - _c4dbgpf("skip {} spaces/tabs", rem.len); - _line_progressed(rem.len); - } - } - return true; - } - else if(rem.begins_with_any(" \t")) - { - size_t pos = rem.first_not_of(" \t"); - if(pos == npos) - pos = rem.len; - _c4dbgpf("skip {} spaces/tabs", pos); - _line_progressed(pos); - return true; - } - else if(rem == '?' || rem.begins_with("? ")) - { - _c4dbgp("it's a complex key"); - _line_progressed(rem.begins_with("? ") ? 2u : 1u); - if(has_any(SSCL)) - _append_key_val_null(rem.str - 1); - add_flags(QMRK); - return true; - } - else if(has_all(QMRK) && rem.begins_with(':')) - { - _c4dbgp("complex key finished"); - if(!has_any(SSCL)) - _store_scalar_null(rem.str); - addrem_flags(RVAL, RKEY|QMRK); - _line_progressed(1); - rem = m_state->line_contents.rem; - if(rem.begins_with(' ')) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, ! _at_line_begin()); - _skipchars(' '); - } - return true; - } - else if(rem == ':' || rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))) - { - _c4dbgp("key finished"); - if(!has_all(SSCL)) - { - _c4dbgp("key was empty..."); - _store_scalar_null(rem.str); - rem_flags(QMRK); - } - addrem_flags(RVAL, RKEY); - _line_progressed(rem == ':' ? 1 : 2); - return true; - } - else if(rem.begins_with("...")) - { - _c4dbgp("end current document"); - _end_stream(); - _line_progressed(3); - return true; - } - else if(rem.begins_with("---")) - { - _c4dbgp("start new document '---'"); - _start_new_doc(rem); - return true; - } - else if(_handle_types()) +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +#ifdef RYML_DBG // !!! <---------------------------------- + +template +void ParseEngine::add_flags(ParserFlag_t on, ParserState * s) +{ + char buf1_[64], buf2_[64], buf3_[64]; + csubstr buf1 = detail::_parser_flags_to_str(buf1_, on); + csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags); + csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags|on); + _c4dbgpf("state[{}]: add {}: before={} after={}", s->level, buf1, buf2, buf3); + s->flags |= on; +} + +template +void ParseEngine::addrem_flags(ParserFlag_t on, ParserFlag_t off, ParserState * s) +{ + char buf1_[64], buf2_[64], buf3_[64], buf4_[64]; + csubstr buf1 = detail::_parser_flags_to_str(buf1_, on); + csubstr buf2 = detail::_parser_flags_to_str(buf2_, off); + csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags); + csubstr buf4 = detail::_parser_flags_to_str(buf4_, ((s->flags|on)&(~off))); + _c4dbgpf("state[{}]: add {} / rem {}: before={} after={}", s->level, buf1, buf2, buf3, buf4); + s->flags |= on; + s->flags &= ~off; +} + +template +void ParseEngine::rem_flags(ParserFlag_t off, ParserState * s) +{ + char buf1_[64], buf2_[64], buf3_[64]; + csubstr buf1 = detail::_parser_flags_to_str(buf1_, off); + csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags); + csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags&(~off)); + _c4dbgpf("state[{}]: rem {}: before={} after={}", s->level, buf1, buf2, buf3); + s->flags &= ~off; +} + +inline C4_NO_INLINE csubstr detail::_parser_flags_to_str(substr buf, ParserFlag_t flags) +{ + size_t pos = 0; + bool gotone = false; + + #define _prflag(fl) \ + if((flags & fl) == (fl)) \ + { \ + if(gotone) \ + { \ + if(pos + 1 < buf.len) \ + buf[pos] = '|'; \ + ++pos; \ + } \ + csubstr fltxt = #fl; \ + if(pos + fltxt.len <= buf.len) \ + memcpy(buf.str + pos, fltxt.str, fltxt.len); \ + pos += fltxt.len; \ + gotone = true; \ + } + + _prflag(RTOP); + _prflag(RUNK); + _prflag(RMAP); + _prflag(RSEQ); + _prflag(FLOW); + _prflag(BLCK); + _prflag(QMRK); + _prflag(RKEY); + _prflag(RVAL); + _prflag(RKCL); + _prflag(RNXT); + _prflag(SSCL); + _prflag(QSCL); + _prflag(RSET); + _prflag(RDOC); + _prflag(NDOC); + _prflag(USTY); + _prflag(RSEQIMAP); + + #undef _prflag + + if(pos == 0) + if(buf.len > 0) + buf[pos++] = '0'; + + RYML_CHECK(pos <= buf.len); + + return buf.first(pos); +} + +#endif // RYML_DBG !!! <---------------------------------- + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +template +csubstr ParseEngine::location_contents(Location const& loc) const +{ + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, loc.offset < m_buf.len); + return m_buf.sub(loc.offset); +} + +template +Location ParseEngine::location(ConstNodeRef node) const +{ + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, node.readable()); + return location(*node.tree(), node.id()); +} + +template +Location ParseEngine::location(Tree const& tree, id_type node) const +{ + // try hard to avoid getting the location from a null string. + Location loc; + if(_location_from_node(tree, node, &loc, 0)) + return loc; + return val_location(m_buf.str); +} + +template +bool ParseEngine::_location_from_node(Tree const& tree, id_type node, Location *C4_RESTRICT loc, id_type level) const +{ + if(tree.has_key(node)) + { + csubstr k = tree.key(node); + if(C4_LIKELY(k.str != nullptr)) { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, k.is_sub(m_buf)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.is_super(k)); + *loc = val_location(k.str); return true; } - else if(_handle_key_anchors_and_refs()) + } + + if(tree.has_val(node)) + { + csubstr v = tree.val(node); + if(C4_LIKELY(v.str != nullptr)) { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, v.is_sub(m_buf)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.is_super(v)); + *loc = val_location(v.str); return true; } - else - { - _c4err("parse error"); - } } - else if(has_any(RVAL)) + + if(tree.is_container(node)) { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY)); + if(_location_from_cont(tree, node, loc)) + return true; + } - _c4dbgp("RMAP|RVAL read scalar?"); - csubstr s; - bool is_quoted; - if(_scan_scalar_map_blck(&s, &is_quoted)) // this also progresses the line + if(tree.type(node) != NOTYPE && level == 0) + { + // try the prev sibling { - _c4dbgpf("it's a{} scalar", is_quoted ? " quoted" : ""); - - rem = m_state->line_contents.rem; - - if(rem.begins_with(": ")) - { - _c4dbgp("actually, the scalar is the first key of a map"); - addrem_flags(RKEY, RVAL); // before _push_level! This prepares the current level for popping by setting it to RNXT - _push_level(); - _move_scalar_from_top(); - _move_val_anchor_to_key_anchor(); - _start_map(); - _save_indentation(m_state->scalar_col); - addrem_flags(RVAL, RKEY); - _line_progressed(2); - } - else if(rem.begins_with(':')) - { - _c4dbgp("actually, the scalar is the first key of a map, and it opens a new scope"); - addrem_flags(RKEY, RVAL); // before _push_level! This prepares the current level for popping by setting it to RNXT - _push_level(); - _move_scalar_from_top(); - _move_val_anchor_to_key_anchor(); - _start_map(); - _save_indentation(/*behind*/s.len); - addrem_flags(RVAL, RKEY); - _line_progressed(1); - } - else + const id_type prev = tree.prev_sibling(node); + if(prev != NONE) { - _c4dbgp("appending keyval to current map"); - _append_key_val(s, is_quoted); - addrem_flags(RKEY, RVAL); + if(_location_from_node(tree, prev, loc, level+1)) + return true; } - return true; - } - else if(rem.begins_with("- ") _RYML_WITH_TAB_TOKENS( || rem.begins_with("-\t"))) - { - _c4dbgp("val is a nested seq, indented"); - addrem_flags(RKEY, RVAL); // before _push_level! - _push_level(); - _move_scalar_from_top(); - _start_seq(); - _save_indentation(); - _line_progressed(2); - return true; - } - else if(rem == '-') - { - _c4dbgp("maybe a seq. start unknown, indented"); - _start_unk(); - _save_indentation(); - _line_progressed(1); - return true; - } - else if(rem.begins_with('[')) - { - _c4dbgp("val is a child seq, flow"); - addrem_flags(RKEY, RVAL); // before _push_level! - _push_level(/*explicit flow*/true); - _move_scalar_from_top(); - _start_seq(); - add_flags(FLOW); - _line_progressed(1); - return true; - } - else if(rem.begins_with('{')) - { - _c4dbgp("val is a child map, flow"); - addrem_flags(RKEY, RVAL); // before _push_level! - _push_level(/*explicit flow*/true); - _move_scalar_from_top(); - _start_map(); - addrem_flags(FLOW|RKEY, RVAL); - _line_progressed(1); - return true; } - else if(rem.begins_with(' ')) + // try the next sibling { - csubstr spc = rem.left_of(rem.first_not_of(' ')); - if(_at_line_begin()) - { - _c4dbgpf("skipping value indentation: {} spaces", spc.len); - _line_progressed(spc.len); - return true; - } - else + const id_type next = tree.next_sibling(node); + if(next != NONE) { - _c4dbgpf("skipping {} spaces", spc.len); - _line_progressed(spc.len); - return true; + if(_location_from_node(tree, next, loc, level+1)) + return true; } } - else if(_handle_types()) - { - return true; - } - else if(_handle_val_anchors_and_refs()) - { - return true; - } - else if(rem.begins_with("--- ") || rem == "---" || rem.begins_with("---\t")) - { - _start_new_doc(rem); - return true; - } - else if(rem.begins_with("...")) - { - _c4dbgp("end current document"); - _end_stream(); - _line_progressed(3); - return true; - } - else + // try the parent { - _c4err("parse error"); + const id_type parent = tree.parent(node); + if(parent != NONE) + { + if(_location_from_node(tree, parent, loc, level+1)) + return true; + } } } - else - { - _c4err("internal error"); - } return false; } - -//----------------------------------------------------------------------------- -bool Parser::_handle_top() +template +bool ParseEngine::_location_from_cont(Tree const& tree, id_type node, Location *C4_RESTRICT loc) const { - _c4dbgp("handle_top"); - csubstr rem = m_state->line_contents.rem; - - if(rem.begins_with('#')) - { - _c4dbgp("a comment line"); - _scan_comment(); - return true; - } - - csubstr trimmed = rem.triml(' '); - - if(trimmed.begins_with('%')) - { - _handle_directive(trimmed); - _line_progressed(rem.len); - return true; - } - else if(trimmed.begins_with("--- ") || trimmed == "---" || trimmed.begins_with("---\t")) - { - _start_new_doc(rem); - if(trimmed.len < rem.len) - { - _line_progressed(rem.len - trimmed.len); - _save_indentation(); - } - return true; - } - else if(trimmed.begins_with("...")) + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, tree.is_container(node)); + if(!tree.is_stream(node)) { - _c4dbgp("end current document"); - _end_stream(); - if(trimmed.len < rem.len) + const char *node_start = tree._p(node)->m_val.scalar.str; // this was stored in the container + if(tree.has_children(node)) { - _line_progressed(rem.len - trimmed.len); + id_type child = tree.first_child(node); + if(tree.has_key(child)) + { + // when a map starts, the container was set after the key + csubstr k = tree.key(child); + if(k.str && node_start > k.str) + node_start = k.str; + } } - _line_progressed(3); + *loc = val_location(node_start); return true; } - else + else // it's a stream { - _c4err("parse error"); + *loc = val_location(m_buf.str); // just return the front of the buffer } - - return false; + return true; } -//----------------------------------------------------------------------------- - -bool Parser::_handle_key_anchors_and_refs() +template +Location ParseEngine::val_location(const char *val) const { - _RYML_CB_ASSERT(m_stack.m_callbacks, !has_any(RVAL)); - const csubstr rem = m_state->line_contents.rem; - if(rem.begins_with('&')) + if(C4_UNLIKELY(val == nullptr)) + return {m_file, 0, 0, 0}; + _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_options.locations()); + // NOTE: if any of these checks fails, the parser needs to be + // instantiated with locations enabled. + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.str == m_newline_offsets_buf.str); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.len == m_newline_offsets_buf.len); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_options.locations()); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !_locations_dirty()); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets != nullptr); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size > 0); + // NOTE: the pointer needs to belong to the buffer that was used to parse. + csubstr src = m_buf; + _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, val != nullptr || src.str == nullptr); + _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str == nullptr && val == nullptr)); + // ok. search the first stored newline after the given ptr + using lineptr_type = size_t const* C4_RESTRICT; + lineptr_type lineptr = nullptr; + size_t offset = (size_t)(val - src.begin()); + if(m_newline_offsets_size < RYML_LOCATIONS_SMALL_THRESHOLD) { - _c4dbgp("found a key anchor!!!"); - if(has_all(QMRK|SSCL)) + // just do a linear search if the size is small. + for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr) { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RKEY)); - _c4dbgp("there is a stored key, so this anchor is for the next element"); - _append_key_val_null(rem.str - 1); - rem_flags(QMRK); - return true; + if(*curr > offset) + { + lineptr = curr; + break; + } } - csubstr anchor = rem.left_of(rem.first_of(' ')); - _line_progressed(anchor.len); - anchor = anchor.sub(1); // skip the first character - _move_key_anchor_to_val_anchor(); - _c4dbgpf("key anchor value: '{}'", anchor); - m_key_anchor = anchor; - m_key_anchor_indentation = m_state->line_contents.current_col(rem); - return true; - } - else if(C4_UNLIKELY(rem.begins_with('*'))) - { - _c4err("not implemented - this should have been catched elsewhere"); - C4_NEVER_REACH(); - return false; } - return false; -} - -bool Parser::_handle_val_anchors_and_refs() -{ - _RYML_CB_ASSERT(m_stack.m_callbacks, !has_any(RKEY)); - const csubstr rem = m_state->line_contents.rem; - if(rem.begins_with('&')) + else { - csubstr anchor = rem.left_of(rem.first_of(' ')); - _line_progressed(anchor.len); - anchor = anchor.sub(1); // skip the first character - _c4dbgpf("val: found an anchor: '{}', indentation={}!!!", anchor, m_state->line_contents.current_col(rem)); - if(m_val_anchor.empty()) - { - _c4dbgpf("save val anchor: '{}'", anchor); - m_val_anchor = anchor; - m_val_anchor_indentation = m_state->line_contents.current_col(rem); - } - else + // do a bisection search if the size is not small. + // + // We could use std::lower_bound but this is simple enough and + // spares the costly include of . + size_t count = m_newline_offsets_size; + size_t step; + lineptr_type it; + lineptr = m_newline_offsets; + while(count) { - _c4dbgpf("there is a pending val anchor '{}'", m_val_anchor); - if(m_tree->is_seq(m_state->node_id)) + step = count >> 1; + it = lineptr + step; + if(*it < offset) { - if(m_tree->has_children(m_state->node_id)) - { - _c4dbgpf("current node={} is a seq, has {} children", m_state->node_id, m_tree->num_children(m_state->node_id)); - _c4dbgpf("... so take the new one as a key anchor '{}'", anchor); - m_key_anchor = anchor; - m_key_anchor_indentation = m_state->line_contents.current_col(rem); - } - else - { - _c4dbgpf("current node={} is a seq, has no children", m_state->node_id); - if(m_tree->has_val_anchor(m_state->node_id)) - { - _c4dbgpf("... node={} already has val anchor: '{}'", m_state->node_id, m_tree->val_anchor(m_state->node_id)); - _c4dbgpf("... so take the new one as a key anchor '{}'", anchor); - m_key_anchor = anchor; - m_key_anchor_indentation = m_state->line_contents.current_col(rem); - } - else - { - _c4dbgpf("... so set pending val anchor: '{}' on current node {}", m_val_anchor, m_state->node_id); - m_tree->set_val_anchor(m_state->node_id, m_val_anchor); - m_val_anchor = anchor; - m_val_anchor_indentation = m_state->line_contents.current_col(rem); - } - } + lineptr = ++it; + count -= step + 1; + } + else + { + count = step; } } - return true; - } - else if(C4_UNLIKELY(rem.begins_with('*'))) - { - _c4err("not implemented - this should have been catched elsewhere"); - C4_NEVER_REACH(); - return false; } - return false; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, lineptr >= m_newline_offsets); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, *lineptr > offset); + Location loc; + loc.name = m_file; + loc.offset = offset; + loc.line = (size_t)(lineptr - m_newline_offsets); + if(lineptr > m_newline_offsets) + loc.col = (offset - *(lineptr-1) - 1u); + else + loc.col = offset; + return loc; } -void Parser::_move_key_anchor_to_val_anchor() +template +void ParseEngine::_prepare_locations() { - if(m_key_anchor.empty()) - return; - _c4dbgpf("move current key anchor to val slot: key='{}' -> val='{}'", m_key_anchor, m_val_anchor); - if(!m_val_anchor.empty()) - _c4err("triple-pending anchor"); - m_val_anchor = m_key_anchor; - m_val_anchor_indentation = m_key_anchor_indentation; - m_key_anchor = {}; - m_key_anchor_indentation = {}; + m_newline_offsets_buf = m_buf; + size_t numnewlines = 1u + m_buf.count('\n'); + _resize_locations(numnewlines); + m_newline_offsets_size = 0; + for(size_t i = 0; i < m_buf.len; i++) + if(m_buf[i] == '\n') + m_newline_offsets[m_newline_offsets_size++] = i; + m_newline_offsets[m_newline_offsets_size++] = m_buf.len; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size == numnewlines); } -void Parser::_move_val_anchor_to_key_anchor() +template +void ParseEngine::_resize_locations(size_t numnewlines) { - if(m_val_anchor.empty()) - return; - if(!_token_is_from_this_line(m_val_anchor)) - return; - _c4dbgpf("move current val anchor to key slot: key='{}' <- val='{}'", m_key_anchor, m_val_anchor); - if(!m_key_anchor.empty()) - _c4err("triple-pending anchor"); - m_key_anchor = m_val_anchor; - m_key_anchor_indentation = m_val_anchor_indentation; - m_val_anchor = {}; - m_val_anchor_indentation = {}; + if(numnewlines > m_newline_offsets_capacity) + { + if(m_newline_offsets) + _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity); + m_newline_offsets = _RYML_CB_ALLOC_HINT(m_evt_handler->m_stack.m_callbacks, size_t, numnewlines, m_newline_offsets); + m_newline_offsets_capacity = numnewlines; + } } -void Parser::_move_key_tag_to_val_tag() +template +bool ParseEngine::_locations_dirty() const { - if(m_key_tag.empty()) - return; - _c4dbgpf("move key tag to val tag: key='{}' -> val='{}'", m_key_tag, m_val_tag); - m_val_tag = m_key_tag; - m_val_tag_indentation = m_key_tag_indentation; - m_key_tag.clear(); - m_key_tag_indentation = 0; + return !m_newline_offsets_size; } -void Parser::_move_val_tag_to_key_tag() -{ - if(m_val_tag.empty()) - return; - if(!_token_is_from_this_line(m_val_tag)) - return; - _c4dbgpf("move val tag to key tag: key='{}' <- val='{}'", m_key_tag, m_val_tag); - m_key_tag = m_val_tag; - m_key_tag_indentation = m_val_tag_indentation; - m_val_tag.clear(); - m_val_tag_indentation = 0; -} -void Parser::_move_key_tag2_to_key_tag() +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +template +void ParseEngine::_handle_flow_skip_whitespace() { - if(m_key_tag2.empty()) - return; - _c4dbgpf("move key tag2 to key tag: key='{}' <- key2='{}'", m_key_tag, m_key_tag2); - m_key_tag = m_key_tag2; - m_key_tag_indentation = m_key_tag2_indentation; - m_key_tag2.clear(); - m_key_tag2_indentation = 0; + if(m_evt_handler->m_curr->line_contents.rem.len > 0) + { + csubstr rem = m_evt_handler->m_curr->line_contents.rem; + if(rem.str[0] == ' ' || rem.str[0] == '\t') + { + _c4dbgpf("starts with whitespace: '{}'", _c4prc(rem.str[0])); + _skipchars(" \t"); + rem = m_evt_handler->m_curr->line_contents.rem; + } + // comments + if(rem.begins_with('#')) + { + _c4dbgpf("it's a comment: {}", m_evt_handler->m_curr->line_contents.rem); + _line_progressed(m_evt_handler->m_curr->line_contents.rem.len); + } + } } //----------------------------------------------------------------------------- -bool Parser::_handle_types() + +template +void ParseEngine::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str, size_t indentation, size_t line) +{ + _c4dbgpf("store annotation[{}]: '{}' indentation={} line={}", dst->num_entries, str, indentation, line); + if(C4_UNLIKELY(dst->num_entries >= C4_COUNTOF(dst->annotations))) + _c4err("too many annotations"); + dst->annotations[dst->num_entries].str = str; + dst->annotations[dst->num_entries].indentation = indentation; + dst->annotations[dst->num_entries].line = line; + ++dst->num_entries; +} + +template +void ParseEngine::_clear_annotations(Annotation *C4_RESTRICT dst) { - csubstr rem = m_state->line_contents.rem.triml(' '); - csubstr t; + dst->num_entries = 0; +} - if(rem.begins_with("!!")) - { - _c4dbgp("begins with '!!'"); - t = rem.left_of(rem.first_of(" ,")); - _RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 2); - //t = t.sub(2); - if(t == "!!set") - add_flags(RSET); - } - else if(rem.begins_with("!<")) - { - _c4dbgp("begins with '!<'"); - t = rem.left_of(rem.first_of('>'), true); - _RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 2); - //t = t.sub(2, t.len-1); - } - else if(rem.begins_with("!h!")) - { - _c4dbgp("begins with '!h!'"); - t = rem.left_of(rem.first_of(' ')); - _RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 3); - //t = t.sub(3); - } - else if(rem.begins_with('!')) +#ifdef RYML_NO_COVERAGE__TO_BE_DELETED +template +bool ParseEngine::_handle_indentation_from_annotations() +{ + if(m_pending_anchors.num_entries == 1u || m_pending_tags.num_entries == 1u) { - _c4dbgp("begins with '!'"); - t = rem.left_of(rem.first_of(' ')); - _RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 1); - //t = t.sub(1); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries < 2u && m_pending_tags.num_entries < 2u); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.annotations[0].line < m_evt_handler->m_curr->pos.line); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.annotations[1].line < m_evt_handler->m_curr->pos.line); + size_t to_skip = m_evt_handler->m_curr->indref; + if(m_pending_anchors.num_entries) + to_skip = m_pending_anchors.annotations[0].indentation > to_skip ? m_pending_anchors.annotations[0].indentation : to_skip; + if(m_pending_tags.num_entries) + to_skip = m_pending_tags.annotations[0].indentation > to_skip ? m_pending_tags.annotations[0].indentation : to_skip; + _c4dbgpf("annotations pending, skip indentation up to {}!", to_skip); + _maybe_skipchars_up_to(' ', to_skip); + return true; } + return false; +} +#endif - if(t.empty()) - return false; +template +bool ParseEngine::_annotations_require_key_container() const +{ + return m_pending_tags.num_entries > 1 || m_pending_anchors.num_entries > 1; +} - if(has_all(QMRK|SSCL)) +template +void ParseEngine::_check_tag(csubstr tag) +{ + if(!tag.begins_with("!<")) { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RKEY)); - _c4dbgp("there is a stored key, so this tag is for the next element"); - _append_key_val_null(rem.str - 1); - rem_flags(QMRK); + if(C4_UNLIKELY(tag.first_of("[]{},") != npos)) + _RYML_CB_ERR_(m_evt_handler->m_stack.m_callbacks, "tags must not contain any of '[]{},'", m_evt_handler->m_curr->pos); } - - #ifdef RYML_NO_COVERAGE__TO_BE_DELETED - const char *tag_beginning = rem.str; - #endif - size_t tag_indentation = m_state->line_contents.current_col(t); - _c4dbgpf("there was a tag: '{}', indentation={}", t, tag_indentation); - _RYML_CB_ASSERT(m_stack.m_callbacks, t.end() > m_state->line_contents.rem.begin()); - _line_progressed(static_cast(t.end() - m_state->line_contents.rem.begin())); + else { - size_t pos = m_state->line_contents.rem.first_not_of(" \t"); - if(pos != csubstr::npos) - _line_progressed(pos); + if(C4_UNLIKELY(!tag.ends_with('>'))) + _RYML_CB_ERR_(m_evt_handler->m_stack.m_callbacks, "malformed tag", m_evt_handler->m_curr->pos); } +} - if(has_all(RMAP|RKEY)) - { - _c4dbgpf("saving map key tag '{}'", t); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_key_tag.empty()); - m_key_tag = t; - m_key_tag_indentation = tag_indentation; - } - else if(has_all(RMAP|RVAL)) +template +void ParseEngine::_handle_annotations_before_blck_key_scalar() +{ + _c4dbgpf("annotations_before_blck_key_scalar, node={}", m_evt_handler->m_curr->node_id); + if(m_pending_tags.num_entries) { - /* foo: !!str - * !!str : bar */ - rem = m_state->line_contents.rem; - rem = rem.left_of(rem.find("#")); - rem = rem.trimr(" \t"); - _c4dbgpf("rem='{}'", rem); - #ifdef RYML_NO_COVERAGE__TO_BE_DELETED - if(rem == ':' || rem.begins_with(": ")) + _c4dbgpf("annotations_before_blck_key_scalar, #tags={}", m_pending_tags.num_entries); + if(C4_LIKELY(m_pending_tags.num_entries == 1)) { - _c4dbgp("the last val was null, and this is a tag from a null key"); - _append_key_val_null(tag_beginning - 1); - _store_scalar_null(rem.str - 1); - // do not change the flag to key, it is ~ - _RYML_CB_ASSERT(m_stack.m_callbacks, rem.begin() > m_state->line_contents.rem.begin()); - size_t token_len = rem == ':' ? 1 : 2; - _line_progressed(static_cast(token_len + rem.begin() - m_state->line_contents.rem.begin())); + _check_tag(m_pending_tags.annotations[0].str); + m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str); + _clear_annotations(&m_pending_tags); + } + else + { + _c4err("too many tags"); } - #endif - _c4dbgpf("saving map val tag '{}'", t); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_val_tag.empty()); - m_val_tag = t; - m_val_tag_indentation = tag_indentation; } - else if(has_all(RSEQ|RVAL) || has_all(RTOP|RUNK|NDOC)) + if(m_pending_anchors.num_entries) { - if(m_val_tag.empty()) + _c4dbgpf("annotations_before_blck_key_scalar, #anchors={}", m_pending_anchors.num_entries); + if(C4_LIKELY(m_pending_anchors.num_entries == 1)) { - _c4dbgpf("saving seq/doc val tag '{}'", t); - m_val_tag = t; - m_val_tag_indentation = tag_indentation; + m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str); + _clear_annotations(&m_pending_anchors); } else { - _c4dbgpf("saving seq/doc key tag '{}'", t); - m_key_tag = t; - m_key_tag_indentation = tag_indentation; + _c4err("too many anchors"); } } - else if(has_all(RTOP|RUNK) || has_any(RUNK)) +} + +template +void ParseEngine::_handle_annotations_before_blck_val_scalar() +{ + _c4dbgpf("annotations_before_blck_val_scalar, node={}", m_evt_handler->m_curr->node_id); + if(m_pending_tags.num_entries) { - rem = m_state->line_contents.rem; - rem = rem.left_of(rem.find("#")); - rem = rem.trimr(" \t"); - if(rem.empty()) + _c4dbgpf("annotations_before_blck_val_scalar, #tags={}", m_pending_tags.num_entries); + if(C4_LIKELY(m_pending_tags.num_entries == 1)) { - _c4dbgpf("saving val tag '{}'", t); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_val_tag.empty()); - m_val_tag = t; - m_val_tag_indentation = tag_indentation; + _check_tag(m_pending_tags.annotations[0].str); + m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str); + _clear_annotations(&m_pending_tags); } else { - _c4dbgpf("saving key tag '{}'", t); - if(m_key_tag.empty()) - { - m_key_tag = t; - m_key_tag_indentation = tag_indentation; - } - else - { - /* handle this case: - * !!str foo: !!map - * !!int 1: !!float 20.0 - * !!int 3: !!float 40.0 - * - * (m_key_tag would be !!str and m_key_tag2 would be !!int) - */ - m_key_tag2 = t; - m_key_tag2_indentation = tag_indentation; - } + _c4err("too many tags"); } } - else - { - _c4err("internal error"); - } - - if(m_val_tag.not_empty()) + if(m_pending_anchors.num_entries) { - YamlTag_e tag = to_tag(t); - if(tag == TAG_STR) + _c4dbgpf("annotations_before_blck_val_scalar, #anchors={}", m_pending_anchors.num_entries); + if(C4_LIKELY(m_pending_anchors.num_entries == 1)) { - _c4dbgpf("tag '{}' is a str-type tag", t); - if(has_all(RTOP|RUNK|NDOC)) - { - _c4dbgpf("docval. slurping the string. pos={}", m_state->pos.offset); - csubstr scalar = _slurp_doc_scalar(); - _c4dbgpf("docval. after slurp: {}, at node {}: '{}'", m_state->pos.offset, m_state->node_id, scalar); - m_tree->to_val(m_state->node_id, scalar, DOC); - _c4dbgpf("docval. val tag {} -> {}", m_val_tag, normalize_tag(m_val_tag)); - m_tree->set_val_tag(m_state->node_id, normalize_tag(m_val_tag)); - m_val_tag.clear(); - if(!m_val_anchor.empty()) - { - _c4dbgpf("setting val anchor[{}]='{}'", m_state->node_id, m_val_anchor); - m_tree->set_val_anchor(m_state->node_id, m_val_anchor); - m_val_anchor.clear(); - } - _end_stream(); - } + m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str); + _clear_annotations(&m_pending_anchors); + } + else + { + _c4err("too many anchors"); } } - return true; } -//----------------------------------------------------------------------------- -csubstr Parser::_slurp_doc_scalar() +template +void ParseEngine::_handle_annotations_before_start_mapblck(size_t current_line) { - csubstr s = m_state->line_contents.rem; - size_t pos = m_state->pos.offset; - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.full.find("---") != csubstr::npos); - _c4dbgpf("slurp 0 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); - if(s.len == 0) - { - _line_ended(); - _scan_line(); - s = m_state->line_contents.rem; - pos = m_state->pos.offset; - } - - size_t skipws = s.first_not_of(" \t"); - _c4dbgpf("slurp 1 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); - if(skipws != npos) + _c4dbgpf("annotations_before_start_mapblck, current_line={}", current_line); + if(m_pending_tags.num_entries == 2) { - _line_progressed(skipws); - s = m_state->line_contents.rem; - pos = m_state->pos.offset; - _c4dbgpf("slurp 2 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); + _c4dbgp("2 tags, setting entry 0"); + _check_tag(m_pending_tags.annotations[0].str); + m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str); } - - _RYML_CB_ASSERT(m_stack.m_callbacks, m_val_anchor.empty()); - _handle_val_anchors_and_refs(); - if(!m_val_anchor.empty()) + else if(m_pending_tags.num_entries == 1) { - s = m_state->line_contents.rem; - skipws = s.first_not_of(" \t"); - if(skipws != npos) + _c4dbgpf("1 tag. line={}, curr={}", m_pending_tags.annotations[0].line); + if(m_pending_tags.annotations[0].line < current_line) { - _line_progressed(skipws); + _c4dbgp("...tag is for the map. setting it."); + _check_tag(m_pending_tags.annotations[0].str); + m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str); + _clear_annotations(&m_pending_tags); } - s = m_state->line_contents.rem; - pos = m_state->pos.offset; - _c4dbgpf("slurp 3 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); } - - if(s.begins_with('\'')) + // + if(m_pending_anchors.num_entries == 2) { - m_state->scalar_col = m_state->line_contents.current_col(s); - return _scan_squot_scalar(); + _c4dbgp("2 anchors, setting entry 0"); + m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str); } - else if(s.begins_with('"')) + else if(m_pending_anchors.num_entries == 1) { - m_state->scalar_col = m_state->line_contents.current_col(s); - return _scan_dquot_scalar(); + _c4dbgpf("1 anchor. line={}, curr={}", m_pending_anchors.annotations[0].line); + if(m_pending_anchors.annotations[0].line < current_line) + { + _c4dbgp("...anchor is for the map. setting it."); + m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str); + _clear_annotations(&m_pending_anchors); + } } - else if(s.begins_with('|') || s.begins_with('>')) +} + +template +void ParseEngine::_handle_annotations_before_start_mapblck_as_key() +{ + _c4dbgp("annotations_before_start_mapblck_as_key"); + if(m_pending_tags.num_entries == 2) { - return _scan_block(); + _check_tag(m_pending_tags.annotations[0].str); + m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str); } - - _c4dbgpf("slurp 4 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); - - m_state->scalar_col = m_state->line_contents.current_col(s); - _RYML_CB_ASSERT(m_stack.m_callbacks, s.end() >= m_buf.begin() + pos); - _line_progressed(static_cast(s.end() - (m_buf.begin() + pos))); - - _c4dbgpf("slurp 5 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); - - if(_at_line_end()) + if(m_pending_anchors.num_entries == 2) { - _c4dbgpf("at line end. curr='{}'", s); - s = _extend_scanned_scalar(s); + m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str); } - - _c4dbgpf("scalar was '{}'", s); - - return s; } - -//----------------------------------------------------------------------------- - -bool Parser::_scan_scalar_seq_blck(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted) +template +void ParseEngine::_handle_annotations_and_indentation_after_start_mapblck(size_t key_indentation, size_t key_line) { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RSEQ)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RVAL)); - _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(RKEY)); - _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(FLOW)); - - csubstr s = m_state->line_contents.rem; - if(s.len == 0) - return false; - s = s.trim(" \t"); - if(s.len == 0) - return false; - - if(s.begins_with('\'')) - { - _c4dbgp("got a ': scanning single-quoted scalar"); - m_state->scalar_col = m_state->line_contents.current_col(s); - *scalar = _scan_squot_scalar(); - *quoted = true; - return true; - } - else if(s.begins_with('"')) + _c4dbgp("annotations_after_start_mapblck"); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries <= 2); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries <= 2); + if(m_pending_anchors.num_entries || m_pending_tags.num_entries) { - _c4dbgp("got a \": scanning double-quoted scalar"); - m_state->scalar_col = m_state->line_contents.current_col(s); - *scalar = _scan_dquot_scalar(); - *quoted = true; - return true; + key_indentation = _select_indentation_from_annotations(key_indentation, key_line); + switch(m_pending_tags.num_entries) + { + case 1u: + _check_tag(m_pending_tags.annotations[0].str); + m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str); + _clear_annotations(&m_pending_tags); + break; + case 2u: + _check_tag(m_pending_tags.annotations[1].str); + m_evt_handler->set_key_tag(m_pending_tags.annotations[1].str); + _clear_annotations(&m_pending_tags); + break; + } + switch(m_pending_anchors.num_entries) + { + case 1u: + m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str); + _clear_annotations(&m_pending_anchors); + break; + case 2u: + m_evt_handler->set_key_anchor(m_pending_anchors.annotations[1].str); + _clear_annotations(&m_pending_anchors); + break; + } } - else if(s.begins_with('|') || s.begins_with('>')) + _set_indentation(key_indentation); +} + +template +size_t ParseEngine::_select_indentation_from_annotations(size_t val_indentation, size_t val_line) +{ + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries || m_pending_anchors.num_entries); + // select the left-most annotation on the max line + auto const *C4_RESTRICT curr = m_pending_anchors.num_entries ? &m_pending_anchors.annotations[0] : &m_pending_tags.annotations[0]; + for(size_t i = 0; i < m_pending_anchors.num_entries; ++i) { - *scalar = _scan_block(); - *quoted = true; - return true; + auto const& C4_RESTRICT ann = m_pending_anchors.annotations[i]; + if(ann.line > curr->line) + curr = &ann; + else if(ann.indentation < curr->indentation) + curr = &ann; } - else if(has_any(RTOP) && _is_doc_sep(s)) + for(size_t j = 0; j < m_pending_tags.num_entries; ++j) { - return false; + auto const& C4_RESTRICT ann = m_pending_tags.annotations[j]; + if(ann.line > curr->line) + curr = &ann; + else if(ann.indentation < curr->indentation) + curr = &ann; } + return curr->line < val_line ? val_indentation : curr->indentation; +} - _c4dbgp("RSEQ|RVAL"); - if( ! _is_scalar_next__rseq_rval(s)) - return false; - _RYML_WITH_TAB_TOKENS(else if(s.begins_with("-\t")) - return false; - ) - - if(s.ends_with(':')) +template +void ParseEngine::_handle_directive(csubstr rem) +{ + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.is_sub(m_evt_handler->m_curr->line_contents.rem)); + const size_t pos = rem.find('#'); + _c4dbgpf("handle_directive: pos={} rem={}", pos, rem); + if(pos == npos) // no comments { - --s.len; + m_evt_handler->add_directive(rem); + _line_progressed(rem.len); } else { - auto first = s.first_of_any(": " _RYML_WITH_TAB_TOKENS( , ":\t"), " #"); - if(first) - s.len = first.pos; - } - s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); - - if(s.empty()) - return false; - - m_state->scalar_col = m_state->line_contents.current_col(s); - _RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str); - _line_progressed(static_cast(s.str - m_state->line_contents.rem.str) + s.len); - - if(_at_line_end() && s != '~') - { - _c4dbgpf("at line end. curr='{}'", s); - s = _extend_scanned_scalar(s); + csubstr to_comment = rem.first(pos); + csubstr trimmed = to_comment.trimr(" \t"); + m_evt_handler->add_directive(trimmed); + _line_progressed(pos); + _skip_comment(); } - - _c4dbgpf("scalar was '{}'", s); - - *scalar = s; - *quoted = false; - return true; } -bool Parser::_scan_scalar_map_blck(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted) -{ - _c4dbgp("_scan_scalar_map_blck"); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RMAP)); - _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(FLOW)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RKEY|RVAL)); - csubstr s = m_state->line_contents.rem; - #ifdef RYML_NO_COVERAGE__TO_BE_DELETED__OR_REFACTORED - if(s.len == 0) - return false; - #endif - s = s.trim(" \t"); - if(s.len == 0) - return false; +//----------------------------------------------------------------------------- - if(s.begins_with('\'')) - { - _c4dbgp("got a ': scanning single-quoted scalar"); - m_state->scalar_col = m_state->line_contents.current_col(s); - *scalar = _scan_squot_scalar(); - *quoted = true; - return true; - } - else if(s.begins_with('"')) - { - _c4dbgp("got a \": scanning double-quoted scalar"); - m_state->scalar_col = m_state->line_contents.current_col(s); - *scalar = _scan_dquot_scalar(); - *quoted = true; - return true; - } - else if(s.begins_with('|') || s.begins_with('>')) - { - *scalar = _scan_block(); - *quoted = true; - return true; - } - else if(has_any(RTOP) && _is_doc_sep(s)) - { - return false; - } +template +void ParseEngine::_handle_seq_json() +{ +seqjson_start: + _c4dbgpf("handle2_seq_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref); - if( ! _is_scalar_next__rmap(s)) - return false; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT)); - size_t colon_token = s.find(": "); - if(colon_token == npos) - { - _RYML_WITH_OR_WITHOUT_TAB_TOKENS( - // with tab tokens - colon_token = s.find(":\t"); - if(colon_token == npos) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0); - colon_token = s.find(':'); - if(colon_token != s.len-1) - colon_token = npos; - } - , - // without tab tokens - colon_token = s.find(':'); - _RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0); - if(colon_token != s.len-1) - colon_token = npos; - ) - } + _handle_flow_skip_whitespace(); + csubstr rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto seqjson_again; - if(has_all(RKEY)) + if(has_any(RVAL)) { - _RYML_CB_ASSERT(m_stack.m_callbacks, !s.begins_with(' ')); - if(has_any(QMRK)) + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + const char first = rem.str[0]; + _c4dbgpf("mapjson[RVAL]: '{}'", first); + switch(first) { - _c4dbgp("RMAP|RKEY|CPLX"); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RMAP)); - if(s.begins_with("? ") || s == '?') - return false; - s = s.left_of(colon_token); - s = s.left_of(s.first_of("#")); - s = s.trimr(" \t"); - if(s.begins_with("---")) - return false; - else if(s.begins_with("...")) - return false; + case '"': + { + _c4dbgp("seqjson[RVAL]: scanning double-quoted scalar"); + ScannedScalar sc = _scan_scalar_dquot(); + csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); + m_evt_handler->set_val_scalar_dquoted(maybe_filtered); + addrem_flags(RNXT, RVAL); + break; } - else + case '[': { - _c4dbgp("RMAP|RKEY"); - _RYML_CB_CHECK(m_stack.m_callbacks, !s.begins_with('{')); - if(s.begins_with("? ") || s == '?') - return false; - s = s.left_of(colon_token); - s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); - if(s.begins_with("---")) + _c4dbgp("seqjson[RVAL]: start child seqjson"); + addrem_flags(RNXT, RVAL); + m_evt_handler->begin_seq_val_flow(); + addrem_flags(RVAL, RNXT); + _line_progressed(1); + break; + } + case '{': + { + _c4dbgp("seqjson[RVAL]: start child mapjson"); + addrem_flags(RNXT, RVAL); + m_evt_handler->begin_map_val_flow(); + addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT); + _line_progressed(1); + goto seqjson_finish; + } + case ']': // this happens on a trailing comma like ", ]" + { + _c4dbgp("seqjson[RVAL]: end!"); + rem_flags(RSEQ); + m_evt_handler->end_seq(); + _line_progressed(1); + if(!has_all(RSEQ|FLOW)) + goto seqjson_finish; + break; + } + default: + { + ScannedScalar sc; + if(_scan_scalar_seq_json(&sc)) { - return false; + _c4dbgp("seqjson[RVAL]: it's a plain scalar."); + csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); + m_evt_handler->set_val_scalar_plain(maybe_filtered); + addrem_flags(RNXT, RVAL); } - else if(s.begins_with("...")) + else { - return false; + _c4err("parse error"); } } + } } - else if(has_all(RVAL)) + else // RNXT { - _c4dbgp("RMAP|RVAL"); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(QMRK)); - if( ! _is_scalar_next__rmap_val(s)) - return false; - _RYML_WITH_TAB_TOKENS( - else if(s.begins_with("-\t")) - return false; - ) - _c4dbgp("RMAP|RVAL: scalar"); - s = s.left_of(s.find(" #")); // is there a comment? - s = s.left_of(s.find("\t#")); // is there a comment? - s = s.trim(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); - if(s.begins_with("---")) - return false; - #ifdef RYML_NO_COVERAGE__TO_BE_DELETED__OR_REFACTORED - else if(s.begins_with("...")) - return false; - #endif + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + const char first = rem.str[0]; + _c4dbgpf("mapjson[RNXT]: '{}'", first); + switch(first) + { + case ',': + { + _c4dbgp("seqjson[RNXT]: expect next val"); + addrem_flags(RVAL, RNXT); + m_evt_handler->add_sibling(); + _line_progressed(1); + break; + } + case ']': + { + _c4dbgp("seqjson[RNXT]: end!"); + m_evt_handler->end_seq(); + _line_progressed(1); + goto seqjson_finish; + } + default: + _c4err("parse error"); + } } - if(s.empty()) - return false; - - m_state->scalar_col = m_state->line_contents.current_col(s); - _RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str); - _line_progressed(static_cast(s.str - m_state->line_contents.rem.str) + s.len); - - if(_at_line_end() && s != '~') + seqjson_again: + _c4dbgt("seqjson: go again", 0); + if(_finished_line()) { - _c4dbgpf("at line end. curr='{}'", s); - s = _extend_scanned_scalar(s); + if(C4_LIKELY(!_finished_file())) + { + _line_ended(); + _scan_line(); + _c4dbgnextline(); + } + else + { + _c4err("missing terminating ]"); + } } + goto seqjson_start; - _c4dbgpf("scalar was '{}'", s); - - *scalar = s; - *quoted = false; - return true; + seqjson_finish: + _c4dbgp("seqjson: finish"); } -bool Parser::_scan_scalar_seq_flow(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted) + +//----------------------------------------------------------------------------- + +template +void ParseEngine::_handle_map_json() { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RSEQ)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(FLOW)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RVAL)); - _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(RKEY)); +mapjson_start: + _c4dbgpf("handle2_map_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref); - csubstr s = m_state->line_contents.rem; - if(s.len == 0) - return false; - s = s.trim(" \t"); - if(s.len == 0) - return false; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT))); - if(s.begins_with('\'')) + _handle_flow_skip_whitespace(); + csubstr rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto mapjson_again; + + if(has_any(RKEY)) { - _c4dbgp("got a ': scanning single-quoted scalar"); - m_state->scalar_col = m_state->line_contents.current_col(s); - *scalar = _scan_squot_scalar(); - *quoted = true; - return true; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK)); + const char first = rem.str[0]; + _c4dbgpf("mapjson[RKEY]: '{}'", first); + switch(first) + { + case '"': + { + _c4dbgp("mapjson[RKEY]: scanning double-quoted scalar"); + ScannedScalar sc = _scan_scalar_dquot(); + csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); + m_evt_handler->set_key_scalar_dquoted(maybe_filtered); + addrem_flags(RKCL, RKEY); + break; + } + case '}': // this happens on a trailing comma like ", }" + { + _c4dbgp("mapjson[RKEY]: end!"); + m_evt_handler->end_map(); + _line_progressed(1); + goto mapjson_finish; + } + default: + _c4err("parse error"); + } + } + else if(has_any(RVAL)) + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK)); + const char first = rem.str[0]; + _c4dbgpf("mapjson[RVAL]: '{}'", first); + switch(first) + { + case '"': + { + _c4dbgp("mapjson[RVAL]: scanning double-quoted scalar"); + ScannedScalar sc = _scan_scalar_dquot(); + csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); + m_evt_handler->set_val_scalar_dquoted(maybe_filtered); + addrem_flags(RNXT, RVAL); + break; + } + case '[': + { + _c4dbgp("mapjson[RVAL]: start val seqjson"); + addrem_flags(RNXT, RVAL); + m_evt_handler->begin_seq_val_flow(); + _set_indentation(m_evt_handler->m_parent->indref); + addrem_flags(RSEQ|RVAL, RMAP|RNXT); + _line_progressed(1); + goto mapjson_finish; + } + case '{': + { + _c4dbgp("mapjson[RVAL]: start val mapjson"); + addrem_flags(RNXT, RVAL); + m_evt_handler->begin_map_val_flow(); + _set_indentation(m_evt_handler->m_parent->indref); + addrem_flags(RKEY, RNXT); + _line_progressed(1); + // keep going in this function + break; + } + default: + { + ScannedScalar sc; + if(_scan_scalar_map_json(&sc)) + { + _c4dbgp("mapjson[RVAL]: plain scalar."); + csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); + m_evt_handler->set_val_scalar_plain(maybe_filtered); + addrem_flags(RNXT, RVAL); + } + else + { + _c4err("parse error"); + } + break; + } + } + } + else if(has_any(RKCL)) // read the key colon + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK)); + const char first = rem.str[0]; + _c4dbgpf("mapjson[RKCL]: '{}'", first); + if(first == ':') + { + _c4dbgp("mapjson[RKCL]: found the colon"); + addrem_flags(RVAL, RKCL); + _line_progressed(1); + } + else + { + _c4err("parse error"); + } } - else if(s.begins_with('"')) + else if(has_any(RNXT)) { - _c4dbgp("got a \": scanning double-quoted scalar"); - m_state->scalar_col = m_state->line_contents.current_col(s); - *scalar = _scan_dquot_scalar(); - *quoted = true; - return true; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK)); + _c4dbgpf("mapjson[RNXT]: '{}'", rem.str[0]); + if(rem.begins_with(',')) + { + _c4dbgp("mapjson[RNXT]: expect next keyval"); + m_evt_handler->add_sibling(); + addrem_flags(RKEY, RNXT); + _line_progressed(1); + } + else if(rem.begins_with('}')) + { + _c4dbgp("mapjson[RNXT]: end!"); + m_evt_handler->end_map(); + _line_progressed(1); + goto mapjson_finish; + } + else + { + _c4err("parse error"); + } } - if(has_all(RVAL)) + mapjson_again: + _c4dbgt("mapjson: go again", 0); + if(_finished_line()) { - _c4dbgp("RSEQ|RVAL"); - if( ! _is_scalar_next__rseq_rval(s)) - return false; - _RYML_WITH_TAB_TOKENS(else if(s.begins_with("-\t")) - return false; - ) - _c4dbgp("RSEQ|RVAL|FLOW"); - s = s.left_of(s.first_of(",]")); - if(s.ends_with(':')) + if(C4_LIKELY(!_finished_file())) { - --s.len; + _line_ended(); + _scan_line(); + _c4dbgnextline(); } else { - auto first = s.first_of_any(": " _RYML_WITH_TAB_TOKENS( , ":\t"), " #"); - if(first) - s.len = first.pos; + _c4err("missing terminating }"); } - s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); } + goto mapjson_start; - if(s.empty()) - return false; - - m_state->scalar_col = m_state->line_contents.current_col(s); - _RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str); - _line_progressed(static_cast(s.str - m_state->line_contents.rem.str) + s.len); - - if(_at_line_end() && s != '~') - { - _c4dbgpf("at line end. curr='{}'", s); - s = _extend_scanned_scalar(s); - } + mapjson_finish: + _c4dbgp("mapjson: finish"); +} - _c4dbgpf("scalar was '{}'", s); - *scalar = s; - *quoted = false; - return true; -} +//----------------------------------------------------------------------------- -bool Parser::_scan_scalar_map_flow(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted) +template +void ParseEngine::_handle_seq_imap() { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RMAP)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(FLOW)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RKEY|RVAL)); +seqimap_start: + _c4dbgpf("handle2_seq_imap: node_id={} level={} indref={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref); - csubstr s = m_state->line_contents.rem; - if(s.len == 0) - return false; - s = s.trim(" \t"); - if(s.len == 0) - return false; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQIMAP)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT|QMRK|RKCL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == has_all(RVAL) + has_all(RNXT) + has_all(QMRK) + has_all(RKCL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 3); + + _handle_flow_skip_whitespace(); + csubstr rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto seqimap_again; - if(s.begins_with('\'')) + if(has_any(RVAL)) { - _c4dbgp("got a ': scanning single-quoted scalar"); - m_state->scalar_col = m_state->line_contents.current_col(s); - *scalar = _scan_squot_scalar(); - *quoted = true; - return true; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL)); + const char first = rem.str[0]; + _c4dbgpf("seqimap[RVAL]: '{}'", _c4prc(first)); + ScannedScalar sc; + if(first == '\'') + { + _c4dbgp("seqimap[RVAL]: scanning single-quoted scalar"); + sc = _scan_scalar_squot(); + csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); + m_evt_handler->set_val_scalar_squoted(maybe_filtered); + m_evt_handler->end_map(); + goto seqimap_finish; + } + else if(first == '"') + { + _c4dbgp("seqimap[RVAL]: scanning double-quoted scalar"); + sc = _scan_scalar_dquot(); + csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); + m_evt_handler->set_val_scalar_dquoted(maybe_filtered); + m_evt_handler->end_map(); + goto seqimap_finish; + } + // block scalars (ie | and >) cannot appear in flow containers + else if(_scan_scalar_plain_map_flow(&sc)) + { + _c4dbgp("seqimap[RVAL]: it's a scalar."); + csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); + m_evt_handler->set_val_scalar_plain(maybe_filtered); + m_evt_handler->end_map(); + goto seqimap_finish; + } + else if(first == '[') + { + _c4dbgp("seqimap[RVAL]: start child seqflow"); + addrem_flags(RNXT, RVAL); + m_evt_handler->begin_seq_val_flow(); + addrem_flags(RVAL, RNXT|RSEQIMAP); + _set_indentation(m_evt_handler->m_parent->indref); + _line_progressed(1); + goto seqimap_finish; + } + else if(first == '{') + { + _c4dbgp("seqimap[RVAL]: start child mapflow"); + addrem_flags(RNXT, RVAL); + m_evt_handler->begin_map_val_flow(); + addrem_flags(RMAP|RKEY, RSEQ|RVAL|RSEQIMAP|RNXT); + _set_indentation(m_evt_handler->m_parent->indref); + _line_progressed(1); + goto seqimap_finish; + } + else if(first == ',' || first == ']') + { + _c4dbgp("seqimap[RVAL]: finish without val."); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->end_map(); + goto seqimap_finish; + } + else if(first == '&') + { + csubstr anchor = _scan_anchor(); + _c4dbgp("seqimap[RVAL]: anchor!"); + m_evt_handler->set_val_anchor(anchor); + } + else if(first == '*') + { + csubstr ref = _scan_ref_seq(); + _c4dbgp("seqimap[RVAL]: ref!"); + m_evt_handler->set_val_ref(ref); + addrem_flags(RNXT, RVAL); + } + else + { + _c4err("parse error"); + } } - else if(s.begins_with('"')) + else if(has_any(RNXT)) { - _c4dbgp("got a \": scanning double-quoted scalar"); - m_state->scalar_col = m_state->line_contents.current_col(s); - *scalar = _scan_dquot_scalar(); - *quoted = true; - return true; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL)); + const char first = rem.str[0]; + _c4dbgpf("seqimap[RNXT]: '{}'", _c4prc(first)); + if(first == ',' || first == ']') + { + // we may get here because a map or a seq started and we + // return later + _c4dbgp("seqimap: done"); + m_evt_handler->end_map(); + goto seqimap_finish; + } + else + { + _c4err("parse error"); + } } - - if( ! _is_scalar_next__rmap(s)) - return false; - - if(has_all(RKEY)) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, !s.begins_with(' ')); - size_t colon_token = s.find(": "); - if(colon_token == npos) + else if(has_any(QMRK)) + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(QMRK)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL)); + const char first = rem.str[0]; + _c4dbgpf("seqimap[QMRK]: '{}'", _c4prc(first)); + ScannedScalar sc; + if(first == '\'') + { + _c4dbgp("seqimap[QMRK]: scanning single-quoted scalar"); + sc = _scan_scalar_squot(); + csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); + m_evt_handler->set_key_scalar_squoted(maybe_filtered); + addrem_flags(RKCL, QMRK); + goto seqimap_again; + } + else if(first == '"') + { + _c4dbgp("seqimap[QMRK]: scanning double-quoted scalar"); + sc = _scan_scalar_dquot(); + csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); + m_evt_handler->set_key_scalar_dquoted(maybe_filtered); + addrem_flags(RKCL, QMRK); + goto seqimap_again; + } + // block scalars (ie | and >) cannot appear in flow containers + else if(_scan_scalar_plain_map_flow(&sc)) + { + _c4dbgp("seqimap[QMRK]: it's a scalar."); + csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); + m_evt_handler->set_key_scalar_plain(maybe_filtered); + addrem_flags(RKCL, QMRK); + goto seqimap_again; + } + else if(first == '[') + { + _c4dbgp("seqimap[QMRK]: start child seqflow"); + addrem_flags(RKCL, QMRK); + m_evt_handler->begin_seq_key_flow(); + addrem_flags(RSEQ|RVAL, RKCL|RSEQIMAP); + _set_indentation(m_evt_handler->m_parent->indref); + _line_progressed(1); + goto seqimap_finish; + } + else if(first == '{') { - _RYML_WITH_OR_WITHOUT_TAB_TOKENS( - // with tab tokens - colon_token = s.find(":\t"); - if(colon_token == npos) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0); - colon_token = s.find(':'); - if(colon_token != s.len-1) - colon_token = npos; - } - , - // without tab tokens - colon_token = s.find(':'); - _RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0); - if(colon_token != s.len-1) - colon_token = npos; - ) + _c4dbgp("seqimap[QMRK]: start child mapflow"); + addrem_flags(RKCL, QMRK); + m_evt_handler->begin_map_key_flow(); + addrem_flags(RMAP|RKEY, RSEQ|RKCL|RSEQIMAP); + _set_indentation(m_evt_handler->m_parent->indref); + _line_progressed(1); + goto seqimap_finish; } - if(s.begins_with("? ") || s == '?') - return false; - if(has_any(QMRK)) - { - _c4dbgp("RMAP|RKEY|CPLX"); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RMAP)); - s = s.left_of(colon_token); - s = s.left_of(s.first_of("#")); - s = s.left_of(s.first_of(':')); - s = s.trimr(" \t"); - if(s.begins_with("---")) - return false; - else if(s.begins_with("...")) - return false; + else if(first == ',' || first == ']') + { + _c4dbgp("seqimap[QMRK]: finish without key."); + m_evt_handler->set_key_scalar_plain({}); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->end_map(); + goto seqimap_finish; + } + else if(first == '&') + { + csubstr anchor = _scan_anchor(); + _c4dbgp("seqimap[QMRK]: anchor!"); + m_evt_handler->set_key_anchor(anchor); + } + else if(first == '*') + { + csubstr ref = _scan_ref_seq(); + _c4dbgp("seqimap[QMRK]: ref!"); + m_evt_handler->set_key_ref(ref); + addrem_flags(RKCL, QMRK); } else { - _RYML_CB_CHECK(m_stack.m_callbacks, !s.begins_with('{')); - _c4dbgp("RMAP|RKEY"); - s = s.left_of(colon_token); - s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); - _c4dbgpf("RMAP|RKEY|FLOW: '{}'", s); - s = s.left_of(s.first_of(",}")); - if(s.ends_with(':')) - --s.len; + _c4err("parse error"); } } - else if(has_all(RVAL)) + else if(has_any(RKCL)) { - _c4dbgp("RMAP|RVAL"); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(QMRK)); - if( ! _is_scalar_next__rmap_val(s)) - return false; - _RYML_WITH_TAB_TOKENS(else if(s.begins_with("-\t")) - return false; - ) - _c4dbgp("RMAP|RVAL|FLOW"); - if(has_none(RSEQIMAP)) - s = s.left_of(s.first_of(",}")); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKCL)); + const char first = rem.str[0]; + _c4dbgpf("seqimap[RKCL]: '{}'", _c4prc(first)); + if(first == ':') + { + _c4dbgp("seqimap[RKCL]: found ':'"); + addrem_flags(RVAL, RKCL); + _line_progressed(1); + goto seqimap_again; + } + else if(first == ',' || first == ']') + { + _c4dbgp("seqimap[RKCL]: found ','. finish without val"); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->end_map(); + goto seqimap_finish; + } else - s = s.left_of(s.first_of(",]")); - s = s.left_of(s.find(" #")); // is there a comment? - s = s.left_of(s.find("\t#")); // is there a comment? - s = s.trim(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); + { + _c4err("parse error"); + } } - if(s.empty()) - return false; - - m_state->scalar_col = m_state->line_contents.current_col(s); - _RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str); - _line_progressed(static_cast(s.str - m_state->line_contents.rem.str) + s.len); - - if(_at_line_end() && s != '~') + seqimap_again: + _c4dbgt("seqimap: go again", 0); + if(_finished_line()) { - _c4dbgpf("at line end. curr='{}'", s); - s = _extend_scanned_scalar(s); + if(C4_LIKELY(!_finished_file())) + { + _line_ended(); + _scan_line(); + _c4dbgnextline(); + } + else + { + _c4err("parse error"); + } } + goto seqimap_start; - _c4dbgpf("scalar was '{}'", s); - - *scalar = s; - *quoted = false; - return true; + seqimap_finish: + _c4dbgp("seqimap: finish"); } -bool Parser::_scan_scalar_unk(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted) + +//----------------------------------------------------------------------------- + +template +void ParseEngine::_handle_seq_flow() { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RUNK)); +seqflow_start: + _c4dbgpf("handle2_seq_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref); - csubstr s = m_state->line_contents.rem; - if(s.len == 0) - return false; - s = s.trim(" \t"); - if(s.len == 0) - return false; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos); - if(s.begins_with('\'')) - { - _c4dbgp("got a ': scanning single-quoted scalar"); - m_state->scalar_col = m_state->line_contents.current_col(s); - *scalar = _scan_squot_scalar(); - *quoted = true; - return true; - } - else if(s.begins_with('"')) - { - _c4dbgp("got a \": scanning double-quoted scalar"); - m_state->scalar_col = m_state->line_contents.current_col(s); - *scalar = _scan_dquot_scalar(); - *quoted = true; - return true; - } - else if(s.begins_with('|') || s.begins_with('>')) - { - *scalar = _scan_block(); - *quoted = true; - return true; - } - else if(has_any(RTOP) && _is_doc_sep(s)) + _handle_flow_skip_whitespace(); + csubstr rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto seqflow_again; + + if(has_any(RVAL)) { - return false; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + const char first = rem.str[0]; + ScannedScalar sc; + if(first == '\'') + { + _c4dbgp("seqflow[RVAL]: scanning single-quoted scalar"); + sc = _scan_scalar_squot(); + csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); + m_evt_handler->set_val_scalar_squoted(maybe_filtered); + addrem_flags(RNXT, RVAL); + } + else if(first == '"') + { + _c4dbgp("seqflow[RVAL]: scanning double-quoted scalar"); + sc = _scan_scalar_dquot(); + csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); + m_evt_handler->set_val_scalar_dquoted(maybe_filtered); + addrem_flags(RNXT, RVAL); + } + // block scalars (ie | and >) cannot appear in flow containers + else if(_scan_scalar_plain_seq_flow(&sc)) + { + _c4dbgp("seqflow[RVAL]: it's a scalar."); + csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); + m_evt_handler->set_val_scalar_plain(maybe_filtered); + addrem_flags(RNXT, RVAL); + } + else if(first == '[') + { + _c4dbgp("seqflow[RVAL]: start child seqflow"); + addrem_flags(RNXT, RVAL); + m_evt_handler->begin_seq_val_flow(); + _set_indentation(m_evt_handler->m_parent->indref); + addrem_flags(RVAL, RNXT); + _line_progressed(1); + } + else if(first == '{') + { + _c4dbgp("seqflow[RVAL]: start child mapflow"); + addrem_flags(RNXT, RVAL); + m_evt_handler->begin_map_val_flow(); + _set_indentation(m_evt_handler->m_parent->indref); + addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT); + _line_progressed(1); + goto seqflow_finish; + } + else if(first == ']') // this happens on a trailing comma like ", ]" + { + _c4dbgp("seqflow[RVAL]: end!"); + _line_progressed(1); + m_evt_handler->end_seq(); + goto seqflow_finish; + } + else if(first == '*') + { + csubstr ref = _scan_ref_seq(); + _c4dbgpf("seqflow[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref); + m_evt_handler->set_val_ref(ref); + addrem_flags(RNXT, RVAL); + } + else if(first == '&') + { + csubstr anchor = _scan_anchor(); + _c4dbgpf("seqflow[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor); + m_evt_handler->set_val_anchor(anchor); + if(_maybe_scan_following_comma()) + { + _c4dbgp("seqflow[RVAL]: empty scalar!"); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->add_sibling(); + } + } + else if(first == '!') + { + csubstr tag = _scan_tag(); + _c4dbgpf("seqflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag); + _check_tag(tag); + m_evt_handler->set_val_tag(tag); + if(_maybe_scan_following_comma()) + { + _c4dbgp("seqflow[RVAL]: empty scalar!"); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->add_sibling(); + } + } + else if(first == ':') + { + _c4dbgpf("seqflow[RVAL]: actually seqimap at node[{}], with empty key", m_evt_handler->m_curr->node_id); + addrem_flags(RNXT, RVAL); + m_evt_handler->begin_map_val_flow(); + _set_indentation(m_evt_handler->m_parent->indref); + m_evt_handler->set_key_scalar_plain({}); + addrem_flags(RSEQIMAP|RVAL, RSEQ|RNXT); + _line_progressed(1); + goto seqflow_finish; + } + else if(first == '?') + { + _c4dbgp("seqflow[RVAL]: start child mapflow, explicit key"); + addrem_flags(RNXT, RVAL); + m_was_inside_qmrk = true; + m_evt_handler->begin_map_val_flow(); + _set_indentation(m_evt_handler->m_parent->indref); + addrem_flags(RSEQIMAP|QMRK, RSEQ|RNXT); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + goto seqflow_finish; + } + else + { + _c4err("parse error"); + } } - - _c4dbgpf("RUNK '[{}]~~~{}~~~", s.len, s); - if( ! _is_scalar_next__runk(s)) + else // RNXT { - _c4dbgp("RUNK: no scalar next"); - return false; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + const char first = rem.str[0]; + if(first == ',') + { + _c4dbgp("seqflow[RNXT]: expect next val"); + addrem_flags(RVAL, RNXT); + m_evt_handler->add_sibling(); + _line_progressed(1); + } + else if(first == ']') + { + _c4dbgp("seqflow[RNXT]: end!"); + m_evt_handler->end_seq(); + _line_progressed(1); + goto seqflow_finish; + } + else if(first == ':') + { + _c4dbgpf("seqflow[RNXT]: actually seqimap at node[{}]", m_evt_handler->m_curr->node_id); + m_evt_handler->actually_val_is_first_key_of_new_map_flow(); + _set_indentation(m_evt_handler->m_parent->indref); + _line_progressed(1); + addrem_flags(RSEQIMAP|RVAL, RNXT); + goto seqflow_finish; + } + else + { + _c4err("parse error"); + } } - size_t pos = s.find(" #"); - if(pos != npos) - s = s.left_of(pos); - pos = s.find(": "); - if(pos != npos) - s = s.left_of(pos); - else if(s.ends_with(':')) - s = s.left_of(s.len-1); - _RYML_WITH_TAB_TOKENS( - else if((pos = s.find(":\t")) != npos) // TABS - s = s.left_of(pos); - ) - else - s = s.left_of(s.first_of(',')); - s = s.trim(" \t"); - _c4dbgpf("RUNK: scalar='{}'", s); - - if(s.empty()) - return false; - - m_state->scalar_col = m_state->line_contents.current_col(s); - _RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str); - _line_progressed(static_cast(s.str - m_state->line_contents.rem.str) + s.len); - if(_at_line_end() && s != '~') + seqflow_again: + _c4dbgt("seqflow: go again", 0); + if(_finished_line()) { - _c4dbgpf("at line end. curr='{}'", s); - s = _extend_scanned_scalar(s); + if(C4_LIKELY(!_finished_file())) + { + _line_ended(); + _scan_line(); + _c4dbgnextline(); + } + else + { + _c4err("missing terminating ]"); + } } + goto seqflow_start; - _c4dbgpf("scalar was '{}'", s); - - *scalar = s; - *quoted = false; - return true; + seqflow_finish: + _c4dbgp("seqflow: finish"); } //----------------------------------------------------------------------------- -csubstr Parser::_extend_scanned_scalar(csubstr s) +template +void ParseEngine::_handle_map_flow() { - if(has_all(RMAP|RKEY|QMRK)) +mapflow_start: + _c4dbgpf("handle2_map_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref); + + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK))); + + _handle_flow_skip_whitespace(); + csubstr rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto mapflow_again; + + if(has_any(RKEY)) { - size_t scalar_indentation = has_any(FLOW) ? 0 : m_state->scalar_col; - _c4dbgpf("extend_scalar: explicit key! indref={} scalar_indentation={} scalar_col={}", m_state->indref, scalar_indentation, m_state->scalar_col); - csubstr n = _scan_to_next_nonempty_line(scalar_indentation); - if(!n.empty()) + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK)); + const char first = rem.str[0]; + _c4dbgpf("mapflow[RKEY]: '{}'", first); + ScannedScalar sc; + if(first == '\'') { - substr full = _scan_complex_key(s, n).trimr(" \t\r\n"); - if(full != s) - s = _filter_plain_scalar(full, scalar_indentation); + _c4dbgp("mapflow[RKEY]: scanning single-quoted scalar"); + sc = _scan_scalar_squot(); + csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); + m_evt_handler->set_key_scalar_squoted(maybe_filtered); + addrem_flags(RKCL, RKEY|QMRK); } - } - // deal with plain (unquoted) scalars that continue to the next line - else if(!s.begins_with_any("*")) // cannot be a plain scalar if it starts with * (that's an anchor reference) - { - _c4dbgpf("extend_scalar: line ended, scalar='{}'", s); - if(has_none(FLOW)) + else if(first == '"') { - size_t scalar_indentation = m_state->indref + 1; - if(has_all(RUNK) && scalar_indentation == 1) - scalar_indentation = 0; - csubstr n = _scan_to_next_nonempty_line(scalar_indentation); - if(!n.empty()) - { - _c4dbgpf("rscalar[IMPL]: state_indref={} state_indentation={} scalar_indentation={}", m_state->indref, m_state->line_contents.indentation, scalar_indentation); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.full.is_super(n)); - substr full = _scan_plain_scalar_blck(s, n, scalar_indentation); - if(full.len >= s.len) - s = _filter_plain_scalar(full, scalar_indentation); - } + _c4dbgp("mapflow[RKEY]: scanning double-quoted scalar"); + sc = _scan_scalar_dquot(); + csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); + m_evt_handler->set_key_scalar_dquoted(maybe_filtered); + addrem_flags(RKCL, RKEY|QMRK); + } + // block scalars (ie | and >) cannot appear in flow containers + else if(_scan_scalar_plain_map_flow(&sc)) + { + _c4dbgp("mapflow[RKEY]: plain scalar"); + csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); + m_evt_handler->set_key_scalar_plain(maybe_filtered); + addrem_flags(RKCL, RKEY|QMRK); + } + else if(first == '?') + { + _c4dbgp("mapflow[RKEY]: explicit key"); + _line_progressed(1); + addrem_flags(QMRK, RKEY); + _maybe_skip_whitespace_tokens(); + } + else if(first == ':') + { + _c4dbgp("mapflow[RKEY]: setting empty key"); + m_evt_handler->set_key_scalar_plain({}); + addrem_flags(RVAL, RKEY|QMRK); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else if(first == '}') // this happens on a trailing comma like ", }" + { + _c4dbgp("mapflow[RKEY]: end!"); + m_evt_handler->end_map(); + _line_progressed(1); + goto mapflow_finish; + } + else if(first == '&') + { + csubstr anchor = _scan_anchor(); + _c4dbgpf("mapflow[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor); + m_evt_handler->set_key_anchor(anchor); + } + else if(first == '*') + { + csubstr ref = _scan_ref_map(); + _c4dbgpf("mapflow[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref); + m_evt_handler->set_key_ref(ref); + addrem_flags(RKCL, RKEY); + } + else if(first == '[') + { + // RYML's tree cannot store container keys, but that's + // handled inside the tree sink. Other sink types may be + // able to handle it. + _c4dbgp("mapflow[RKEY]: start child seqflow (!)"); + addrem_flags(RKCL, RKEY); + m_evt_handler->begin_seq_key_flow(); + addrem_flags(RSEQ|RVAL, RMAP|RKCL); + _set_indentation(m_evt_handler->m_parent->indref); + _line_progressed(1); + goto mapflow_finish; + } + else if(first == '{') + { + // RYML's tree cannot store container keys, but that's + // handled inside the tree sink. Other sink types may be + // able to handle it. + _c4dbgp("mapflow[RKEY]: start child mapflow (!)"); + addrem_flags(RKCL, RKEY); + m_evt_handler->begin_map_key_flow(); + addrem_flags(RKEY, RVAL|RKCL); + _set_indentation(m_evt_handler->m_parent->indref); + _line_progressed(1); + // keep going in this function + } + else if(first == '!') + { + csubstr tag = _scan_tag(); + _c4dbgpf("mapflow[RKEY]: tag! [{}]~~~{}~~~", tag.len, tag); + _check_tag(tag); + m_evt_handler->set_key_tag(tag); } else { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(FLOW)); - csubstr n = _scan_to_next_nonempty_line(/*indentation*/0); - if(!n.empty()) - { - _c4dbgp("rscalar[FLOW]"); - substr full = _scan_plain_scalar_flow(s, n); - s = _filter_plain_scalar(full, /*indentation*/0); - } + _c4err("parse error"); } } - - return s; -} - - -//----------------------------------------------------------------------------- - -substr Parser::_scan_plain_scalar_flow(csubstr currscalar, csubstr peeked_line) -{ - static constexpr const csubstr chars = "[]{}?#,"; - size_t pos = peeked_line.first_of(chars); - bool first = true; - while(pos != 0) + else if(has_any(RKCL)) // read the key colon { - if(has_all(RMAP|RKEY) || has_any(RUNK)) + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK)); + const char first = rem.str[0]; + _c4dbgpf("mapflow[RKCL]: '{}'", first); + if(first == ':') { - csubstr tpkl = peeked_line.triml(' ').trimr("\r\n"); - if(tpkl.begins_with(": ") || tpkl == ':') - { - _c4dbgpf("rscalar[FLOW]: map value starts on the peeked line: '{}'", peeked_line); - peeked_line = peeked_line.first(0); - break; - } - else - { - auto colon_pos = peeked_line.first_of_any(": ", ":"); - if(colon_pos && colon_pos.pos < pos) - { - peeked_line = peeked_line.first(colon_pos.pos); - _c4dbgpf("rscalar[FLOW]: found colon at {}. peeked='{}'", colon_pos.pos, peeked_line); - _RYML_CB_ASSERT(m_stack.m_callbacks, peeked_line.end() >= m_state->line_contents.rem.begin()); - _line_progressed(static_cast(peeked_line.end() - m_state->line_contents.rem.begin())); - break; - } - } + _c4dbgp("mapflow[RKCL]: found the colon"); + addrem_flags(RVAL, RKCL); + _line_progressed(1); + } + else if(first == '}') + { + _c4dbgp("mapflow[RKCL]: end with missing val!"); + addrem_flags(RVAL, RKCL); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->end_map(); + _line_progressed(1); + goto mapflow_finish; + } + else if(first == ',') + { + _c4dbgp("mapflow[RKCL]: got comma. val is missing"); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->add_sibling(); + addrem_flags(RKEY, RKCL); + _line_progressed(1); + } + else + { + _c4err("parse error"); + } + } + else if(has_any(RVAL)) + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK)); + const char first = rem.str[0]; + _c4dbgpf("mapflow[RVAL]: '{}'", first); + ScannedScalar sc; + if(first == '\'') + { + _c4dbgp("mapflow[RVAL]: scanning single-quoted scalar"); + sc = _scan_scalar_squot(); + csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); + m_evt_handler->set_val_scalar_squoted(maybe_filtered); + addrem_flags(RNXT, RVAL); } - if(pos != npos) + else if(first == '"') { - _c4dbgpf("rscalar[FLOW]: found special character '{}' at {}, stopping: '{}'", peeked_line[pos], pos, peeked_line.left_of(pos).trimr("\r\n")); - peeked_line = peeked_line.left_of(pos); - _RYML_CB_ASSERT(m_stack.m_callbacks, peeked_line.end() >= m_state->line_contents.rem.begin()); - _line_progressed(static_cast(peeked_line.end() - m_state->line_contents.rem.begin())); - break; + _c4dbgp("mapflow[RVAL]: scanning double-quoted scalar"); + sc = _scan_scalar_dquot(); + csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); + m_evt_handler->set_val_scalar_dquoted(maybe_filtered); + addrem_flags(RNXT, RVAL); } - _c4dbgpf("rscalar[FLOW]: append another line, full: '{}'", peeked_line.trimr("\r\n")); - if(!first) + // block scalars (ie | and >) cannot appear in flow containers + else if(_scan_scalar_plain_map_flow(&sc)) { - RYML_CHECK(_advance_to_peeked()); + _c4dbgp("mapflow[RVAL]: plain scalar."); + csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); + m_evt_handler->set_val_scalar_plain(maybe_filtered); + addrem_flags(RNXT, RVAL); } - peeked_line = _scan_to_next_nonempty_line(/*indentation*/0); - if(peeked_line.empty()) + else if(first == '[') { - _c4err("expected token or continuation"); + _c4dbgp("mapflow[RVAL]: start val seqflow"); + addrem_flags(RNXT, RVAL); + m_evt_handler->begin_seq_val_flow(); + _set_indentation(m_evt_handler->m_parent->indref); + addrem_flags(RSEQ|RVAL, RMAP|RNXT); + _line_progressed(1); + goto mapflow_finish; } - pos = peeked_line.first_of(chars); - first = false; - } - substr full(m_buf.str + (currscalar.str - m_buf.str), m_buf.begin() + m_state->pos.offset); - full = full.trimr("\n\r "); - return full; -} - - -//----------------------------------------------------------------------------- - -substr Parser::_scan_plain_scalar_blck(csubstr currscalar, csubstr peeked_line, size_t indentation) -{ - _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(currscalar)); - // NOTE. there's a problem with _scan_to_next_nonempty_line(), as it counts newlines twice - // size_t offs = m_state->pos.offset; // so we workaround by directly counting from the end of the given scalar - _RYML_CB_ASSERT(m_stack.m_callbacks, currscalar.end() >= m_buf.begin()); - size_t offs = static_cast(currscalar.end() - m_buf.begin()); - _RYML_CB_ASSERT(m_stack.m_callbacks, peeked_line.begins_with(' ', indentation)); - while(true) - { - _c4dbgpf("rscalar[IMPL]: continuing... ref_indentation={}", indentation); - if(peeked_line.begins_with("...") || peeked_line.begins_with("---")) + else if(first == '{') { - _c4dbgpf("rscalar[IMPL]: document termination next -- bail now '{}'", peeked_line.trimr("\r\n")); - break; + _c4dbgp("mapflow[RVAL]: start val mapflow"); + addrem_flags(RNXT, RVAL); + m_evt_handler->begin_map_val_flow(); + _set_indentation(m_evt_handler->m_parent->indref); + addrem_flags(RKEY, RNXT); + _line_progressed(1); + // keep going in this function } - else if(( ! peeked_line.begins_with(' ', indentation))) // is the line deindented? + else if(first == '}') { - if(!peeked_line.trim(" \r\n\t").empty()) // is the line not blank? - { - _c4dbgpf("rscalar[IMPL]: deindented line, not blank -- bail now '{}'", peeked_line.trimr("\r\n")); - break; - } - _c4dbgpf("rscalar[IMPL]: line is blank and has less indentation: ref={} line={}: '{}'", indentation, peeked_line.first_not_of(' ') == csubstr::npos ? 0 : peeked_line.first_not_of(' '), peeked_line.trimr("\r\n")); - _c4dbgpf("rscalar[IMPL]: ... searching for a line starting at indentation {}", indentation); - csubstr next_peeked = _scan_to_next_nonempty_line(indentation); - if(next_peeked.empty()) - { - _c4dbgp("rscalar[IMPL]: ... finished."); - break; - } - _c4dbgp("rscalar[IMPL]: ... continuing."); - peeked_line = next_peeked; + _c4dbgp("mapflow[RVAL]: end!"); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->end_map(); + _line_progressed(1); + goto mapflow_finish; } - - _c4dbgpf("rscalar[IMPL]: line contents: '{}'", peeked_line.right_of(indentation, true).trimr("\r\n")); - size_t token_pos; - if(peeked_line.find(": ") != npos) + else if(first == '*') { - _line_progressed(peeked_line.find(": ")); - _c4err("': ' is not a valid token in plain flow (unquoted) scalars"); + csubstr ref = _scan_ref_map(); + _c4dbgpf("mapflow[RVAL]: key ref! [{}]~~~{}~~~", ref.len, ref); + m_evt_handler->set_val_ref(ref); + addrem_flags(RNXT, RVAL); } - else if(peeked_line.ends_with(':')) + else if(first == '&') { - _line_progressed(peeked_line.find(':')); - _c4err("lines cannot end with ':' in plain flow (unquoted) scalars"); + csubstr anchor = _scan_anchor(); + _c4dbgpf("mapflow[RVAL]: key anchor! [{}]~~~{}~~~", anchor.len, anchor); + m_evt_handler->set_val_anchor(anchor); } - else if((token_pos = peeked_line.find(" #")) != npos) + else if(first == '!') { - _line_progressed(token_pos); - break; - //_c4err("' #' is not a valid token in plain flow (unquoted) scalars"); + csubstr tag = _scan_tag(); + _c4dbgpf("mapflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag); + _check_tag(tag); + m_evt_handler->set_val_tag(tag); } - - _c4dbgpf("rscalar[IMPL]: append another line: (len={})'{}'", peeked_line.len, peeked_line.trimr("\r\n")); - if(!_advance_to_peeked()) + else { - _c4dbgp("rscalar[IMPL]: file finishes after the scalar"); - break; + _c4err("parse error"); } - peeked_line = m_state->line_contents.rem; } - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.offset >= offs); - substr full(m_buf.str + (currscalar.str - m_buf.str), - currscalar.len + (m_state->pos.offset - offs)); - full = full.trimr("\r\n "); - return full; -} - -substr Parser::_scan_complex_key(csubstr currscalar, csubstr peeked_line) -{ - _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(currscalar)); - // NOTE. there's a problem with _scan_to_next_nonempty_line(), as it counts newlines twice - // size_t offs = m_state->pos.offset; // so we workaround by directly counting from the end of the given scalar - _RYML_CB_ASSERT(m_stack.m_callbacks, currscalar.end() >= m_buf.begin()); - size_t offs = static_cast(currscalar.end() - m_buf.begin()); - while(true) + else if(has_any(RNXT)) { - _c4dbgp("rcplxkey: continuing..."); - if(peeked_line.begins_with("...") || peeked_line.begins_with("---")) + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK)); + _c4dbgpf("mapflow[RNXT]: '{}'", rem.str[0]); + if(rem.begins_with(',')) { - _c4dbgpf("rcplxkey: document termination next -- bail now '{}'", peeked_line.trimr("\r\n")); - break; + _c4dbgp("mapflow[RNXT]: expect next keyval"); + m_evt_handler->add_sibling(); + addrem_flags(RKEY, RNXT); + _line_progressed(1); + } + else if(rem.begins_with('}')) + { + _c4dbgp("mapflow[RNXT]: end!"); + m_evt_handler->end_map(); + _line_progressed(1); + goto mapflow_finish; } else { - size_t pos = peeked_line.first_of("?:[]{}"); - if(pos == csubstr::npos) - { - pos = peeked_line.find("- "); - } - if(pos != csubstr::npos) - { - _c4dbgpf("rcplxkey: found special characters at pos={}: '{}'", pos, peeked_line.trimr("\r\n")); - _line_progressed(pos); - break; - } + _c4err("parse error"); } - - _c4dbgpf("rcplxkey: no special chars found '{}'", peeked_line.trimr("\r\n")); - csubstr next_peeked = _scan_to_next_nonempty_line(0); - if(next_peeked.empty()) + } + else if(has_any(QMRK)) + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + const char first = rem.str[0]; + _c4dbgpf("mapflow[QMRK]: '{}'", first); + ScannedScalar sc; + if(first == '\'') { - _c4dbgp("rcplxkey: empty ... finished."); - break; + _c4dbgp("mapflow[QMRK]: scanning single-quoted scalar"); + sc = _scan_scalar_squot(); + csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); + m_evt_handler->set_key_scalar_squoted(maybe_filtered); + addrem_flags(RKCL, QMRK); } - _c4dbgp("rcplxkey: ... continuing."); - peeked_line = next_peeked; - - _c4dbgpf("rcplxkey: line contents: '{}'", peeked_line.trimr("\r\n")); - size_t colpos; - if((colpos = peeked_line.find(": ")) != npos) + else if(first == '"') { - _c4dbgp("rcplxkey: found ': ', stopping."); - _line_progressed(colpos); - break; + _c4dbgp("mapflow[QMRK]: scanning double-quoted scalar"); + sc = _scan_scalar_dquot(); + csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); + m_evt_handler->set_key_scalar_dquoted(maybe_filtered); + addrem_flags(RKCL, QMRK); } - #ifdef RYML_NO_COVERAGE__TO_BE_DELETED - else if((colpos = peeked_line.ends_with(':'))) + // block scalars (ie | and >) cannot appear in flow containers + else if(_scan_scalar_plain_map_flow(&sc)) { - _c4dbgp("rcplxkey: ends with ':', stopping."); - _line_progressed(colpos); - break; + _c4dbgp("mapflow[QMRK]: plain scalar"); + csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); + m_evt_handler->set_key_scalar_plain(maybe_filtered); + addrem_flags(RKCL, QMRK); } - #endif - _c4dbgpf("rcplxkey: append another line: (len={})'{}'", peeked_line.len, peeked_line.trimr("\r\n")); - if(!_advance_to_peeked()) + else if(first == ':') { - _c4dbgp("rcplxkey: file finishes after the scalar"); - break; + _c4dbgp("mapflow[QMRK]: setting empty key"); + m_evt_handler->set_key_scalar_plain({}); + addrem_flags(RVAL, QMRK); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); } - peeked_line = m_state->line_contents.rem; - } - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.offset >= offs); - substr full(m_buf.str + (currscalar.str - m_buf.str), - currscalar.len + (m_state->pos.offset - offs)); - return full; -} - -//! scans to the next non-blank line starting with the given indentation -csubstr Parser::_scan_to_next_nonempty_line(size_t indentation) -{ - csubstr next_peeked; - while(true) - { - _c4dbgpf("rscalar: ... curr offset: {} indentation={}", m_state->pos.offset, indentation); - next_peeked = _peek_next_line(m_state->pos.offset); - csubstr next_peeked_triml = next_peeked.triml(' '); - _c4dbgpf("rscalar: ... next peeked line='{}'", next_peeked.trimr("\r\n")); - if(next_peeked_triml.begins_with('#')) + else if(first == '}') // this happens on a trailing comma like ", }" { - _c4dbgp("rscalar: ... first non-space character is #"); - return {}; + _c4dbgp("mapflow[QMRK]: end!"); + m_evt_handler->set_key_scalar_plain({}); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->end_map(); + _line_progressed(1); + goto mapflow_finish; } - else if(next_peeked.begins_with(' ', indentation)) + else if(first == '&') { - _c4dbgpf("rscalar: ... begins at same indentation {}, assuming continuation", indentation); - _advance_to_peeked(); - return next_peeked; + csubstr anchor = _scan_anchor(); + _c4dbgpf("mapflow[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor); + m_evt_handler->set_key_anchor(anchor); } - else // check for de-indentation + else if(first == '*') { - csubstr trimmed = next_peeked_triml.trimr("\t\r\n"); - _c4dbgpf("rscalar: ... deindented! trimmed='{}'", trimmed); - if(!trimmed.empty()) - { - _c4dbgp("rscalar: ... and not empty. bailing out."); - return {}; - } + csubstr ref = _scan_ref_map(); + _c4dbgpf("mapflow[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref); + m_evt_handler->set_key_ref(ref); + addrem_flags(RKCL, QMRK); } - if(!_advance_to_peeked()) + else if(first == '[') { - _c4dbgp("rscalar: file finished"); - return {}; + // RYML's tree cannot store container keys, but that's + // handled inside the tree sink. Other sink types may be + // able to handle it. + _c4dbgp("mapflow[QMRK]: start child seqflow (!)"); + addrem_flags(RKCL, QMRK); + m_evt_handler->begin_seq_key_flow(); + addrem_flags(RSEQ|RVAL, RMAP|RKCL); + _set_indentation(m_evt_handler->m_parent->indref); + _line_progressed(1); + goto mapflow_finish; + } + else if(first == '{') + { + // RYML's tree cannot store container keys, but that's + // handled inside the tree sink. Other sink types may be + // able to handle it. + _c4dbgp("mapflow[QMRK]: start child mapflow (!)"); + addrem_flags(RKCL, QMRK); + m_evt_handler->begin_map_key_flow(); + _set_indentation(m_evt_handler->m_parent->indref); + addrem_flags(RKEY, RKCL); + _line_progressed(1); + // keep going in this function + } + else if(first == '!') + { + csubstr tag = _scan_tag(); + _c4dbgpf("mapflow[QMRK]: tag! [{}]~~~{}~~~", tag.len, tag); + _check_tag(tag); + m_evt_handler->set_key_tag(tag); + } + else + { + _c4err("parse error"); } } - return {}; -} - -// returns false when the file finished -bool Parser::_advance_to_peeked() -{ - _line_progressed(m_state->line_contents.rem.len); - _line_ended(); // advances to the peeked-at line, consuming all remaining (probably newline) characters on the current line - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.rem.first_of("\r\n") == csubstr::npos); - _c4dbgpf("advance to peeked: scan more... pos={} len={}", m_state->pos.offset, m_buf.len); - _scan_line(); // puts the peeked-at line in the buffer - if(_finished_file()) - { - _c4dbgp("rscalar: finished file!"); - return false; - } - return true; -} - -//----------------------------------------------------------------------------- - -C4_ALWAYS_INLINE size_t _extend_from_combined_newline(char nl, char following) -{ - return (nl == '\n' && following == '\r') || (nl == '\r' && following == '\n'); -} - -//! look for the next newline chars, and jump to the right of those -csubstr from_next_line(csubstr rem) -{ - size_t nlpos = rem.first_of("\r\n"); - if(nlpos == csubstr::npos) - return {}; - const char nl = rem[nlpos]; - rem = rem.right_of(nlpos); - if(rem.empty()) - return {}; - if(_extend_from_combined_newline(nl, rem.front())) - rem = rem.sub(1); - return rem; -} - -csubstr Parser::_peek_next_line(size_t pos) const -{ - csubstr rem{}; // declare here because of the goto - size_t nlpos{}; // declare here because of the goto - pos = pos == npos ? m_state->pos.offset : pos; - if(pos >= m_buf.len) - goto next_is_empty; - - // look for the next newline chars, and jump to the right of those - rem = from_next_line(m_buf.sub(pos)); - if(rem.empty()) - goto next_is_empty; - - // now get everything up to and including the following newline chars - nlpos = rem.first_of("\r\n"); - if((nlpos != csubstr::npos) && (nlpos + 1 < rem.len)) - nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]); - rem = rem.left_of(nlpos, /*include_pos*/true); - - _c4dbgpf("peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr("\r\n")); - return rem; - -next_is_empty: - _c4dbgpf("peek next line @ {}: (len=0)''", pos); - return {}; -} - - -//----------------------------------------------------------------------------- -void Parser::LineContents::reset_with_next_line(csubstr buf, size_t offset) -{ - RYML_ASSERT(offset <= buf.len); - char const* C4_RESTRICT b = &buf[offset]; - char const* C4_RESTRICT e = b; - // get the current line stripped of newline chars - while(e < buf.end() && (*e != '\n' && *e != '\r')) - ++e; - RYML_ASSERT(e >= b); - const csubstr stripped_ = buf.sub(offset, static_cast(e - b)); - // advance pos to include the first line ending - if(e != buf.end() && *e == '\r') - ++e; - if(e != buf.end() && *e == '\n') - ++e; - RYML_ASSERT(e >= b); - const csubstr full_ = buf.sub(offset, static_cast(e - b)); - reset(full_, stripped_); -} -void Parser::_scan_line() -{ - if(m_state->pos.offset >= m_buf.len) + mapflow_again: + _c4dbgt("mapflow: go again", 0); + if(_finished_line()) { - m_state->line_contents.reset(m_buf.last(0), m_buf.last(0)); - return; + if(C4_LIKELY(!_finished_file())) + { + _line_ended(); + _scan_line(); + _c4dbgnextline(); + } + else + { + _c4err("missing terminating }"); + } } - m_state->line_contents.reset_with_next_line(m_buf, m_state->pos.offset); -} - + goto mapflow_start; -//----------------------------------------------------------------------------- -void Parser::_line_progressed(size_t ahead) -{ - _c4dbgpf("line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}", m_state->pos.line, m_state->line_contents.full.len, ahead, m_state->pos.col, m_state->pos.col+ahead, m_state->pos.offset, m_state->pos.offset+ahead); - m_state->pos.offset += ahead; - m_state->pos.col += ahead; - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.col <= m_state->line_contents.stripped.len+1); - m_state->line_contents.rem = m_state->line_contents.rem.sub(ahead); -} - -void Parser::_line_ended() -{ - _c4dbgpf("line[{}] ({} cols) ended! offset {}-->{}", m_state->pos.line, m_state->line_contents.full.len, m_state->pos.offset, m_state->pos.offset+m_state->line_contents.full.len - m_state->line_contents.stripped.len); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.col == m_state->line_contents.stripped.len+1); - m_state->pos.offset += m_state->line_contents.full.len - m_state->line_contents.stripped.len; - ++m_state->pos.line; - m_state->pos.col = 1; -} - -void Parser::_line_ended_undo() -{ - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.col == 1u); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.line > 0u); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.offset >= m_state->line_contents.full.len - m_state->line_contents.stripped.len); - size_t delta = m_state->line_contents.full.len - m_state->line_contents.stripped.len; - _c4dbgpf("line[{}] undo ended! line {}-->{}, offset {}-->{}", m_state->pos.line, m_state->pos.line, m_state->pos.line - 1, m_state->pos.offset, m_state->pos.offset - delta); - m_state->pos.offset -= delta; - --m_state->pos.line; - m_state->pos.col = m_state->line_contents.stripped.len + 1u; - // don't forget to undo also the changes to the remainder of the line - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.offset >= m_buf.len || m_buf[m_state->pos.offset] == '\n' || m_buf[m_state->pos.offset] == '\r'); - m_state->line_contents.rem = m_buf.sub(m_state->pos.offset, 0); + mapflow_finish: + _c4dbgp("mapflow: finish"); } //----------------------------------------------------------------------------- -void Parser::_set_indentation(size_t indentation) -{ - m_state->indref = indentation; - _c4dbgpf("state[{}]: saving indentation: {}", m_state-m_stack.begin(), m_state->indref); -} -void Parser::_save_indentation(size_t behind) +template +void ParseEngine::_handle_seq_block() { - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.rem.begin() >= m_state->line_contents.full.begin()); - m_state->indref = static_cast(m_state->line_contents.rem.begin() - m_state->line_contents.full.begin()); - _RYML_CB_ASSERT(m_stack.m_callbacks, behind <= m_state->indref); - m_state->indref -= behind; - _c4dbgpf("state[{}]: saving indentation: {}", m_state-m_stack.begin(), m_state->indref); -} +seqblck_start: + _c4dbgpf("handle2_seq_block: seq_id={} node_id={} level={} indent={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref); -bool Parser::_maybe_set_indentation_from_anchor_or_tag() -{ - if(m_key_anchor.not_empty()) - { - _c4dbgpf("set indentation from key anchor: {}", m_key_anchor_indentation); - _set_indentation(m_key_anchor_indentation); // this is the column where the anchor starts - return true; - } - else if(m_key_tag.not_empty()) - { - _c4dbgpf("set indentation from key tag: {}", m_key_tag_indentation); - _set_indentation(m_key_tag_indentation); // this is the column where the tag starts - return true; - } - return false; -} + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(BLCK)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RVAL) + has_any(RNXT))); + _maybe_skip_comment(); + csubstr rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto seqblck_again; -//----------------------------------------------------------------------------- -void Parser::_write_key_anchor(size_t node_id) -{ - _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->has_key(node_id)); - if( ! m_key_anchor.empty()) - { - _c4dbgpf("node={}: set key anchor to '{}'", node_id, m_key_anchor); - m_tree->set_key_anchor(node_id, m_key_anchor); - m_key_anchor.clear(); - m_key_anchor_was_before = false; - m_key_anchor_indentation = 0; - } - else if( ! m_tree->is_key_quoted(node_id)) + if(has_any(RVAL)) { - csubstr r = m_tree->key(node_id); - if(r.begins_with('*')) + _c4dbgpf("seqblck[RVAL]: col={}", m_evt_handler->m_curr->pos.col); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + if(m_evt_handler->m_curr->at_line_beginning()) { - _c4dbgpf("node={}: set key reference: '{}'", node_id, r); - m_tree->set_key_ref(node_id, r.sub(1)); + _c4dbgpf("seqblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation); + if(m_evt_handler->m_curr->indentation_ge()) + { + _c4dbgpf("seqblck[RVAL]: skip {} from indentation", m_evt_handler->m_curr->line_contents.indentation); + _line_progressed(m_evt_handler->m_curr->line_contents.indentation); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto seqblck_again; + } + else if(m_evt_handler->m_curr->indentation_lt()) + { + _c4dbgp("seqblck[RVAL]: smaller indentation!"); + _handle_indentation_pop_from_block_seq(); + goto seqblck_finish; + } + else if(m_evt_handler->m_curr->line_contents.indentation == npos) + { + _c4dbgp("seqblck[RVAL]: empty line!"); + _line_progressed(m_evt_handler->m_curr->line_contents.rem.len); + goto seqblck_again; + } } - else if(r == "<<") + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + else { - m_tree->set_key_ref(node_id, r); - _c4dbgpf("node={}: it's an inheriting reference", node_id); - if(m_tree->is_seq(node_id)) + // accomodate annotation on the previous line. eg: + // - &elm + // foo # <-- on this line + // - &elm + // &foo foo: bar # <-- on this line + if(rem.str[0] == ' ') { - _c4dbgpf("node={}: inheriting from seq of {}", node_id, m_tree->num_children(node_id)); - for(size_t i = m_tree->first_child(node_id); i != NONE; i = m_tree->next_sibling(i)) + if(_handle_indentation_from_annotations()) { - if( ! (m_tree->val(i).begins_with('*'))) - _c4err("malformed reference: '{}'", m_tree->val(i)); + _c4dbgp("seqblck[RVAL]: annotations!"); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto seqblck_again; } } - else if( ! m_tree->val(node_id).begins_with('*')) + } + #endif + _RYML_CB_ASSERT(callbacks(), rem.len); + _c4dbgpf("seqblck[RVAL]: '{}' node_id={}", rem.str[0], m_evt_handler->m_curr->node_id); + const char first = rem.str[0]; + const size_t startline = m_evt_handler->m_curr->pos.line; + // warning: the gcc optimizer on x86 builds is brittle with + // this function: + const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(); + ScannedScalar sc; + if(first == '\'') + { + _c4dbgp("seqblck[RVAL]: single-quoted scalar"); + sc = _scan_scalar_squot(); + if(!_maybe_scan_following_colon()) + { + _c4dbgp("seqblck[RVAL]: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL! + m_evt_handler->set_val_scalar_squoted(maybe_filtered); + addrem_flags(RNXT, RVAL); + } + else { - _c4err("malformed reference: '{}'", m_tree->val(node_id)); + _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key"); + addrem_flags(RNXT, RVAL); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->begin_map_val_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY! + m_evt_handler->set_key_scalar_squoted(maybe_filtered); + addrem_flags(RMAP|RVAL, RSEQ|RNXT); + _maybe_skip_whitespace_tokens(); + goto seqblck_finish; + } + } + else if(first == '"') + { + _c4dbgp("seqblck[RVAL]: double-quoted scalar"); + sc = _scan_scalar_dquot(); + if(!_maybe_scan_following_colon()) + { + _c4dbgp("seqblck[RVAL]: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL! + m_evt_handler->set_val_scalar_dquoted(maybe_filtered); + addrem_flags(RNXT, RVAL); } - //m_tree->set_key_ref(node_id, r); - } - } -} - -//----------------------------------------------------------------------------- -void Parser::_write_val_anchor(size_t node_id) -{ - if( ! m_val_anchor.empty()) - { - _c4dbgpf("node={}: set val anchor to '{}'", node_id, m_val_anchor); - m_tree->set_val_anchor(node_id, m_val_anchor); - m_val_anchor.clear(); - } - csubstr r = m_tree->has_val(node_id) ? m_tree->val(node_id) : ""; - if(!m_tree->is_val_quoted(node_id) && r.begins_with('*')) - { - _c4dbgpf("node={}: set val reference: '{}'", node_id, r); - RYML_CHECK(!m_tree->has_val_anchor(node_id)); - m_tree->set_val_ref(node_id, r.sub(1)); - } -} - -//----------------------------------------------------------------------------- -void Parser::_push_level(bool explicit_flow_chars) -{ - _c4dbgpf("pushing level! currnode={} currlevel={} stacksize={} stackcap={}", m_state->node_id, m_state->level, m_stack.size(), m_stack.capacity()); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state == &m_stack.top()); - if(node(m_state) == nullptr) - { - _c4dbgp("pushing level! actually no, current node is null"); - //_RYML_CB_ASSERT(m_stack.m_callbacks, ! explicit_flow_chars); - return; - } - flag_t st = RUNK; - if(explicit_flow_chars || has_all(FLOW)) - { - st |= FLOW; - } - m_stack.push_top(); - m_state = &m_stack.top(); - set_flags(st); - m_state->node_id = (size_t)NONE; - m_state->indref = (size_t)NONE; - ++m_state->level; - _c4dbgpf("pushing level: now, currlevel={}", m_state->level); -} - -void Parser::_pop_level() -{ - _c4dbgpf("popping level! currnode={} currlevel={}", m_state->node_id, m_state->level); - if(has_any(RMAP) || m_tree->is_map(m_state->node_id)) - { - _stop_map(); - } - if(has_any(RSEQ) || m_tree->is_seq(m_state->node_id)) - { - _stop_seq(); - } - if(m_tree->is_doc(m_state->node_id)) - { - _stop_doc(); - } - _RYML_CB_ASSERT(m_stack.m_callbacks, m_stack.size() > 1); - _prepare_pop(); - m_stack.pop(); - m_state = &m_stack.top(); - /*if(has_any(RMAP)) - { - _toggle_key_val(); - }*/ - if(m_state->line_contents.indentation == 0) - { - //_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RTOP)); - add_flags(RTOP); - } - _c4dbgpf("popping level: now, currnode={} currlevel={}", m_state->node_id, m_state->level); -} - -//----------------------------------------------------------------------------- -void Parser::_start_unk(bool /*as_child*/) -{ - _c4dbgp("start_unk"); - _push_level(); - _move_scalar_from_top(); -} - -//----------------------------------------------------------------------------- -void Parser::_start_doc(bool as_child) -{ - _c4dbgpf("start_doc (as child={})", as_child); - _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_stack.bottom()) == node(m_root_id)); - size_t parent_id = m_stack.size() < 2 ? m_root_id : m_stack.top(1).node_id; - _RYML_CB_ASSERT(m_stack.m_callbacks, parent_id != NONE); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_root(parent_id)); - _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_state) == nullptr || node(m_state) == node(m_root_id)); - if(as_child) - { - _c4dbgpf("start_doc: parent={}", parent_id); - if( ! m_tree->is_stream(parent_id)) - { - _c4dbgp("start_doc: rearranging with root as STREAM"); - m_tree->set_root_as_stream(); - } - m_state->node_id = m_tree->append_child(parent_id); - m_tree->to_doc(m_state->node_id); - } - #ifdef RYML_NO_COVERAGE__TO_BE_DELETED - else - { - _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_seq(parent_id) || m_tree->empty(parent_id)); - m_state->node_id = parent_id; - if( ! m_tree->is_doc(parent_id)) - { - m_tree->to_doc(parent_id, DOC); + else + { + _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key"); + addrem_flags(RNXT, RVAL); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->begin_map_val_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY! + m_evt_handler->set_key_scalar_dquoted(maybe_filtered); + addrem_flags(RMAP|RVAL, RSEQ|RNXT); + _maybe_skip_whitespace_tokens(); + goto seqblck_finish; + } + } + // block scalars can only appear as keys when in QMRK scope + // (ie, after ? tokens), so no need to scan following colon in + // here. + else if(first == '|') + { + _c4dbgp("seqblck[RVAL]: block-literal scalar"); + ScannedBlock sb; + _scan_block(&sb, m_evt_handler->m_curr->indref + 1); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb); + m_evt_handler->set_val_scalar_literal(maybe_filtered); + addrem_flags(RNXT, RVAL); } - } - #endif - _c4dbgpf("start_doc: id={}", m_state->node_id); - add_flags(RUNK|RTOP|NDOC); - _handle_types(); - rem_flags(NDOC); -} - -void Parser::_stop_doc() -{ - size_t doc_node = m_state->node_id; - _c4dbgpf("stop_doc[{}]", doc_node); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_doc(doc_node)); - if(!m_tree->is_seq(doc_node) && !m_tree->is_map(doc_node) && !m_tree->is_val(doc_node)) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(SSCL)); - _c4dbgpf("stop_doc[{}]: there was nothing; adding null val", doc_node); - m_tree->to_val(doc_node, {}, DOC); - } -} - -void Parser::_end_stream() -{ - _c4dbgpf("end_stream, level={} node_id={}", m_state->level, m_state->node_id); - _RYML_CB_ASSERT(m_stack.m_callbacks, ! m_stack.empty()); - NodeData *added = nullptr; - if(has_any(SSCL)) - { - if(m_tree->is_seq(m_state->node_id)) + else if(first == '>') { - _c4dbgp("append val..."); - added = _append_val(_consume_scalar()); + _c4dbgp("seqblck[RVAL]: block-folded scalar"); + ScannedBlock sb; + _scan_block(&sb, m_evt_handler->m_curr->indref + 1); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb); + m_evt_handler->set_val_scalar_folded(maybe_filtered); + addrem_flags(RNXT, RVAL); } - else if(m_tree->is_map(m_state->node_id)) + else if(_scan_scalar_plain_seq_blck(&sc)) { - _c4dbgp("append null key val..."); - added = _append_key_val_null(m_state->line_contents.rem.str); - #ifdef RYML_NO_COVERAGE__TO_BE_DELETED - if(has_any(RSEQIMAP)) + _c4dbgp("seqblck[RVAL]: plain scalar."); + if(!_maybe_scan_following_colon()) { - _stop_seqimap(); - _pop_level(); + _c4dbgp("seqblck[RVAL]: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); // VAL! + m_evt_handler->set_val_scalar_plain(maybe_filtered); + addrem_flags(RNXT, RVAL); + } + else + { + if(startindent > m_evt_handler->m_curr->indref) + { + _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key"); + addrem_flags(RNXT, RVAL); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->begin_map_val_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY! + m_evt_handler->set_key_scalar_plain(maybe_filtered); + addrem_flags(RMAP|RVAL, RSEQ|RNXT); + _maybe_skip_whitespace_tokens(); + goto seqblck_finish; + } + else if(m_evt_handler->m_parent && m_evt_handler->m_parent->indref == startindent && has_any(RMAP|BLCK, m_evt_handler->m_parent)) + { + _c4dbgp("seqblck[RVAL]: empty val + end indentless seq + set key"); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->end_seq(); + m_evt_handler->add_sibling(); + csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY! + m_evt_handler->set_key_scalar_plain(maybe_filtered); + addrem_flags(RVAL, RNXT|RKEY); + _maybe_skip_whitespace_tokens(); + goto seqblck_finish; + } + else + { + _c4err("parse error"); + } } - #endif } - else if(m_tree->is_doc(m_state->node_id) || m_tree->type(m_state->node_id) == NOTYPE) + else if(first == '[') { - NodeType_e quoted = has_any(QSCL) ? VALQUO : NOTYPE; // do this before consuming the scalar - csubstr scalar = _consume_scalar(); - _c4dbgpf("node[{}]: to docval '{}'{}", m_state->node_id, scalar, quoted == VALQUO ? ", quoted" : ""); - m_tree->to_val(m_state->node_id, scalar, DOC|quoted); - added = m_tree->get(m_state->node_id); + _c4dbgp("seqblck[RVAL]: start child seqflow"); + addrem_flags(RNXT, RVAL); + m_evt_handler->begin_seq_val_flow(); + addrem_flags(FLOW|RVAL, BLCK|RNXT); + _line_progressed(1); + _set_indentation(m_evt_handler->m_parent->indref + 1u); + goto seqblck_finish; } - else + else if(first == '{') { - _c4err("internal error"); + _c4dbgp("seqblck[RVAL]: start child mapflow"); + addrem_flags(RNXT, RVAL); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_map_val_flow(); + addrem_flags(RMAP|RKEY|FLOW, BLCK|RSEQ|RVAL|RNXT); + _line_progressed(1); + _set_indentation(m_evt_handler->m_parent->indref + 1u); + goto seqblck_finish; } - } - else if(has_all(RSEQ|RVAL) && has_none(FLOW)) - { - _c4dbgp("add last..."); - added = _append_val_null(m_state->line_contents.rem.str); - } - else if(!m_val_tag.empty() && (m_tree->is_doc(m_state->node_id) || m_tree->type(m_state->node_id) == NOTYPE)) - { - csubstr scalar = m_state->line_contents.rem.first(0); - _c4dbgpf("node[{}]: add null scalar as docval", m_state->node_id); - m_tree->to_val(m_state->node_id, scalar, DOC); - added = m_tree->get(m_state->node_id); - } - - if(added) - { - size_t added_id = m_tree->id(added); - if(m_tree->is_seq(m_state->node_id) || m_tree->is_doc(m_state->node_id)) + else if(first == '-') { - if(!m_key_anchor.empty()) + if(startindent == m_evt_handler->m_curr->indref) { - _c4dbgpf("node[{}]: move key to val anchor: '{}'", added_id, m_key_anchor); - m_val_anchor = m_key_anchor; - m_key_anchor = {}; + _c4dbgp("seqblck[RVAL]: prev val was empty"); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->set_val_scalar_plain({}); + // keep in RVAL, but for the next sibling + m_evt_handler->add_sibling(); } - if(!m_key_tag.empty()) + else { - _c4dbgpf("node[{}]: move key to val tag: '{}'", added_id, m_key_tag); - m_val_tag = m_key_tag; - m_key_tag = {}; + _c4dbgp("seqblck[RVAL]: start child seqblck"); + _RYML_CB_ASSERT(this->callbacks(), startindent > m_evt_handler->m_curr->indref); + addrem_flags(RNXT, RVAL); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_seq_val_block(); + addrem_flags(RVAL, RNXT); + _save_indentation(); + // keep going on inside this function } + _line_progressed(1); + _maybe_skip_whitespace_tokens(); } - #ifdef RYML_NO_COVERAGE__TO_BE_DELETED - if(!m_key_anchor.empty()) + else if(first == ':') { - _c4dbgpf("node[{}]: set key anchor='{}'", added_id, m_key_anchor); - m_tree->set_key_anchor(added_id, m_key_anchor); - m_key_anchor = {}; + _c4dbgp("seqblck[RVAL]: start child mapblck with empty key"); + addrem_flags(RNXT, RVAL); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->begin_map_val_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + m_evt_handler->set_key_scalar_plain({}); + addrem_flags(RMAP|RVAL, RSEQ|RNXT); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + goto seqblck_finish; } - #endif - if(!m_val_anchor.empty()) + else if(first == '&') { - _c4dbgpf("node[{}]: set val anchor='{}'", added_id, m_val_anchor); - m_tree->set_val_anchor(added_id, m_val_anchor); - m_val_anchor = {}; + const csubstr anchor = _scan_anchor(); + _c4dbgpf("seqblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor); + // we need to buffer the anchors, as there may be two + // consecutive anchors in here + _add_annotation(&m_pending_anchors, anchor, startindent, startline); } - #ifdef RYML_NO_COVERAGE__TO_BE_DELETED - if(!m_key_tag.empty()) + else if(first == '*') { - _c4dbgpf("node[{}]: set key tag='{}' -> '{}'", added_id, m_key_tag, normalize_tag(m_key_tag)); - m_tree->set_key_tag(added_id, normalize_tag(m_key_tag)); - m_key_tag = {}; + csubstr ref = _scan_ref_seq(); + _c4dbgpf("seqblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref); + if(!_maybe_scan_following_colon()) + { + _c4dbgp("seqblck[RVAL]: set ref as val!"); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->set_val_ref(ref); + addrem_flags(RNXT, RVAL); + } + else + { + _c4dbgp("seqblck[RVAL]: ref is key of map"); + addrem_flags(RNXT, RVAL); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->begin_map_val_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + m_evt_handler->set_key_ref(ref); + addrem_flags(RMAP|RVAL, RSEQ|RNXT); + _set_indentation(startindent); + _maybe_skip_whitespace_tokens(); + goto seqblck_finish; + } } - #endif - if(!m_val_tag.empty()) + else if(first == '!') { - _c4dbgpf("node[{}]: set val tag='{}' -> '{}'", added_id, m_val_tag, normalize_tag(m_val_tag)); - m_tree->set_val_tag(added_id, normalize_tag(m_val_tag)); - m_val_tag = {}; + csubstr tag = _scan_tag(); + _c4dbgpf("seqblck[RVAL]: val tag! [{}]~~~{}~~~", tag.len, tag); + // we need to buffer the tags, as there may be two + // consecutive tags in here + _add_annotation(&m_pending_tags, tag, startindent, startline); } - } - - while(m_stack.size() > 1) - { - _c4dbgpf("popping level: {} (stack sz={})", m_state->level, m_stack.size()); - _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(SSCL, &m_stack.top())); - if(has_all(RSEQ|FLOW)) - _err("closing ] not found"); - _pop_level(); - } - add_flags(NDOC); -} - -void Parser::_start_new_doc(csubstr rem) -{ - _c4dbgp("_start_new_doc"); - _RYML_CB_ASSERT(m_stack.m_callbacks, rem.begins_with("---")); - C4_UNUSED(rem); - - _end_stream(); - - size_t indref = m_state->indref; - _c4dbgpf("start a document, indentation={}", indref); - _line_progressed(3); - _push_level(); - _start_doc(); - _set_indentation(indref); -} - - -//----------------------------------------------------------------------------- -void Parser::_start_map(bool as_child) -{ - _c4dbgpf("start_map (as child={})", as_child); - addrem_flags(RMAP|RVAL, RKEY|RUNK); - _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_stack.bottom()) == node(m_root_id)); - size_t parent_id = m_stack.size() < 2 ? m_root_id : m_stack.top(1).node_id; - _RYML_CB_ASSERT(m_stack.m_callbacks, parent_id != NONE); - _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_state) == nullptr || node(m_state) == node(m_root_id)); - if(as_child) - { - m_state->node_id = m_tree->append_child(parent_id); - if(has_all(SSCL)) + else if(first == '?') { - type_bits key_quoted = NOTYPE; - if(m_state->flags & QSCL) // before consuming the scalar - key_quoted |= KEYQUO; - csubstr key = _consume_scalar(); - m_tree->to_map(m_state->node_id, key, key_quoted); - _c4dbgpf("start_map: id={} key='{}'", m_state->node_id, m_tree->key(m_state->node_id)); - _write_key_anchor(m_state->node_id); - if( ! m_key_tag.empty()) - { - _c4dbgpf("node[{}]: set key tag='{}' -> '{}'", m_state->node_id, m_key_tag, normalize_tag(m_key_tag)); - m_tree->set_key_tag(m_state->node_id, normalize_tag(m_key_tag)); - m_key_tag.clear(); - } + _c4dbgp("seqblck[RVAL]: start child mapblck, explicit key"); + addrem_flags(RNXT, RVAL); + m_was_inside_qmrk = true; + m_evt_handler->begin_map_val_block(); + addrem_flags(RMAP|QMRK, RSEQ|RNXT); + _save_indentation(); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + goto seqblck_finish; } else { - m_tree->to_map(m_state->node_id); - _c4dbgpf("start_map: id={}", m_state->node_id); + _c4err("parse error"); } - m_tree->_p(m_state->node_id)->m_val.scalar.str = m_state->line_contents.rem.str; - _write_val_anchor(m_state->node_id); } - else + else // RNXT { - _RYML_CB_ASSERT(m_stack.m_callbacks, parent_id != NONE); - m_state->node_id = parent_id; - _c4dbgpf("start_map: id={}", m_state->node_id); - type_bits as_doc = 0; - if(m_tree->is_doc(m_state->node_id)) - as_doc |= DOC; - if(!m_tree->is_map(parent_id)) + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + // + // handle indentation + // + _c4dbgpf("seqblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation); + if(C4_UNLIKELY(!_at_line_begin())) + _c4err("parse error"); + if(m_evt_handler->m_curr->indentation_ge()) { - RYML_CHECK(!m_tree->has_children(parent_id)); - m_tree->to_map(parent_id, as_doc); + _c4dbgpf("seqblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref); + _line_progressed(m_evt_handler->m_curr->indref); + _maybe_skip_whitespace_tokens(); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto seqblck_again; } - else + else if(m_evt_handler->m_curr->indentation_lt()) { - m_tree->_add_flags(parent_id, as_doc); + _c4dbgp("seqblck[RNXT]: smaller indentation!"); + _handle_indentation_pop_from_block_seq(); + if(has_all(RSEQ|BLCK)) + { + _c4dbgp("seqblck[RNXT]: still seqblck!"); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT)); + _line_progressed(m_evt_handler->m_curr->line_contents.indentation); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto seqblck_again; + } + else + { + _c4dbgp("seqblck[RNXT]: no longer seqblck!"); + goto seqblck_finish; + } } - _move_scalar_from_top(); - if(m_key_anchor.not_empty()) - m_key_anchor_was_before = true; - _write_val_anchor(parent_id); - if(m_stack.size() >= 2) + else if(m_evt_handler->m_curr->line_contents.indentation == npos) { - State const& parent_state = m_stack.top(1); - if(parent_state.flags & RSET) - add_flags(RSET); + _c4dbgpf("seqblck[RNXT]: blank line, len={}", m_evt_handler->m_curr->line_contents.rem); + _line_progressed(m_evt_handler->m_curr->line_contents.rem.len); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto seqblck_again; } - m_tree->_p(parent_id)->m_val.scalar.str = m_state->line_contents.rem.str; - } - if( ! m_val_tag.empty()) - { - _c4dbgpf("node[{}]: set val tag='{}' -> '{}'", m_state->node_id, m_val_tag, normalize_tag(m_val_tag)); - m_tree->set_val_tag(m_state->node_id, normalize_tag(m_val_tag)); - m_val_tag.clear(); - } -} - -void Parser::_start_map_unk(bool as_child) -{ - if(!m_key_anchor_was_before) - { - _c4dbgpf("stash key anchor before starting map... '{}'", m_key_anchor); - csubstr ka = m_key_anchor; - m_key_anchor = {}; - _start_map(as_child); - m_key_anchor = ka; - } - else - { - _start_map(as_child); - m_key_anchor_was_before = false; - } - if(m_key_tag2.not_empty()) - { - m_key_tag = m_key_tag2; - m_key_tag_indentation = m_key_tag2_indentation; - m_key_tag2.clear(); - m_key_tag2_indentation = 0; - } -} - -void Parser::_stop_map() -{ - _c4dbgpf("stop_map[{}]", m_state->node_id); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_map(m_state->node_id)); - if(has_all(QMRK|RKEY) && !has_all(SSCL)) - { - _c4dbgpf("stop_map[{}]: RKEY", m_state->node_id); - _store_scalar_null(m_state->line_contents.rem.str); - _append_key_val_null(m_state->line_contents.rem.str); - } -} - - -//----------------------------------------------------------------------------- -void Parser::_start_seq(bool as_child) -{ - _c4dbgpf("start_seq (as child={})", as_child); - if(has_all(RTOP|RUNK)) - { - _c4dbgpf("start_seq: moving key tag to val tag: '{}'", m_key_tag); - m_val_tag = m_key_tag; - m_key_tag.clear(); - } - addrem_flags(RSEQ|RVAL, RUNK); - _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_stack.bottom()) == node(m_root_id)); - size_t parent_id = m_stack.size() < 2 ? m_root_id : m_stack.top(1).node_id; - _RYML_CB_ASSERT(m_stack.m_callbacks, parent_id != NONE); - _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_state) == nullptr || node(m_state) == node(m_root_id)); - if(as_child) - { - m_state->node_id = m_tree->append_child(parent_id); - if(has_all(SSCL)) + // + // now handle the tokens + // + const char first = rem.str[0]; + _c4dbgpf("seqblck[RNXT]: '{}' node_id={}", first, m_evt_handler->m_curr->node_id); + if(first == '-') { - _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_map(parent_id)); - type_bits key_quoted = 0; - if(m_state->flags & QSCL) // before consuming the scalar - key_quoted |= KEYQUO; - csubstr key = _consume_scalar(); - m_tree->to_seq(m_state->node_id, key, key_quoted); - _c4dbgpf("start_seq: id={} name='{}'", m_state->node_id, m_tree->key(m_state->node_id)); - _write_key_anchor(m_state->node_id); - if( ! m_key_tag.empty()) + if(m_evt_handler->m_curr->indref > 0 || m_evt_handler->m_curr->line_contents.indentation > 0 || !_is_doc_begin_token(rem)) + { + _c4dbgp("seqblck[RNXT]: expect next val"); + addrem_flags(RVAL, RNXT); + m_evt_handler->add_sibling(); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else { - _c4dbgpf("start_seq[{}]: set key tag='{}' -> '{}'", m_state->node_id, m_key_tag, normalize_tag(m_key_tag)); - m_tree->set_key_tag(m_state->node_id, normalize_tag(m_key_tag)); - m_key_tag.clear(); + _c4dbgp("seqblck[RNXT]: start doc"); + _start_doc_suddenly(); + _line_progressed(3); + _maybe_skip_whitespace_tokens(); + goto seqblck_finish; } } - else + else if(first == ':') { - type_bits as_doc = 0; - _RYML_CB_ASSERT(m_stack.m_callbacks, !m_tree->is_doc(m_state->node_id)); - m_tree->to_seq(m_state->node_id, as_doc); - _c4dbgpf("start_seq: id={}{}", m_state->node_id, as_doc ? " as doc" : ""); + // This happens for example in `- [a: b]: c` (after + // terminating the seq, ie, after `]`). All other cases + // (ie colon after scalars) are caught elsewhere (ie, in + // RVAL state). + auto const *C4_RESTRICT prev_state = m_evt_handler->m_parent; + if(C4_LIKELY(prev_state && (prev_state->flags & RMAP))) + { + _c4dbgp("seqblck[RNXT]: actually this seq was '?' key of parent map"); + m_evt_handler->end_seq(); + goto seqblck_finish; + } + else + { + _c4err("parse error"); + } } - _write_val_anchor(m_state->node_id); - m_tree->_p(m_state->node_id)->m_val.scalar.str = m_state->line_contents.rem.str; - } - else - { - m_state->node_id = parent_id; - type_bits as_doc = 0; - if(m_tree->is_doc(m_state->node_id)) - as_doc |= DOC; - if(!m_tree->is_seq(parent_id)) + else if(first == '.') { - RYML_CHECK(!m_tree->has_children(parent_id)); - m_tree->to_seq(parent_id, as_doc); + _c4dbgp("seqblck[RNXT]: maybe doc?"); + csubstr rs = rem.sub(1); + if(rs == ".." || rs.begins_with(".. ")) + { + _c4dbgp("seqblck[RNXT]: end+start doc"); + _end_doc_suddenly(); + _line_progressed(3); + _maybe_skip_whitespace_tokens(); + goto seqblck_finish; + } + else + { + _c4err("parse error"); + } } else { - m_tree->_add_flags(parent_id, as_doc); + // may be an indentless sequence nested in a map... + //if(m_evt_handler->m_stack.size() >= 2) + #ifdef RYML_DBG + char flagbuf_[128]; + for(auto const& s : m_evt_handler->m_stack) + { + _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags)); + } + #endif + if(m_evt_handler->m_parent && has_all(RMAP|BLCK, m_evt_handler->m_parent) && m_evt_handler->m_curr->indref == m_evt_handler->m_parent->indref) + { + _c4dbgpf("seqblck[RNXT]: end indentless seq, go to parent={}. node={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id); + _RYML_CB_ASSERT(this->callbacks(), m_evt_handler->m_curr != m_evt_handler->m_parent); + _handle_indentation_pop(m_evt_handler->m_parent); + _RYML_CB_ASSERT(this->callbacks(), has_all(RMAP|BLCK)); + m_evt_handler->add_sibling(); + addrem_flags(RKEY, RNXT); + goto seqblck_finish; + } + else //if(first != '*') + { + _c4err("parse error"); + } } - _move_scalar_from_top(); - _c4dbgpf("start_seq: id={}{}", m_state->node_id, as_doc ? " as_doc" : ""); - _write_val_anchor(parent_id); - m_tree->_p(parent_id)->m_val.scalar.str = m_state->line_contents.rem.str; - } - if( ! m_val_tag.empty()) - { - _c4dbgpf("start_seq[{}]: set val tag='{}' -> '{}'", m_state->node_id, m_val_tag, normalize_tag(m_val_tag)); - m_tree->set_val_tag(m_state->node_id, normalize_tag(m_val_tag)); - m_val_tag.clear(); - } -} - -void Parser::_stop_seq() -{ - _c4dbgp("stop_seq"); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_seq(m_state->node_id)); -} - - -//----------------------------------------------------------------------------- -void Parser::_start_seqimap() -{ - _c4dbgpf("start_seqimap at node={}. has_children={}", m_state->node_id, m_tree->has_children(m_state->node_id)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RSEQ|FLOW)); - // create a map, and turn the last scalar of this sequence - // into the key of the map's first child. This scalar was - // understood to be a value in the sequence, but it is - // actually a key of a map, implicitly opened here. - // Eg [val, key: val] - // - // Yep, YAML is crazy. - if(m_tree->has_children(m_state->node_id) && m_tree->has_val(m_tree->last_child(m_state->node_id))) - { - size_t prev = m_tree->last_child(m_state->node_id); - NodeType ty = m_tree->_p(prev)->m_type; // don't use type() because it masks out the quotes - NodeScalar tmp = m_tree->valsc(prev); - _c4dbgpf("has children and last child={} has val. saving the scalars, val='{}' quoted={}", prev, tmp.scalar, ty.is_val_quoted()); - m_tree->remove(prev); - _push_level(); - _start_map(); - _store_scalar(tmp.scalar, ty.is_val_quoted()); - m_key_anchor = tmp.anchor; - m_key_tag = tmp.tag; - } - else - { - _c4dbgpf("node {} has no children yet, using empty key", m_state->node_id); - _push_level(); - _start_map(); - _store_scalar_null(m_state->line_contents.rem.str); } - add_flags(RSEQIMAP|FLOW); -} - -void Parser::_stop_seqimap() -{ - _c4dbgp("stop_seqimap"); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RSEQIMAP)); -} - - -//----------------------------------------------------------------------------- -NodeData* Parser::_append_val(csubstr val, flag_t quoted) -{ - _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_all(SSCL)); - _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_state) != nullptr); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_seq(m_state->node_id)); - type_bits additional_flags = quoted ? VALQUO : NOTYPE; - _c4dbgpf("append val: '{}' to parent id={} (level={}){}", val, m_state->node_id, m_state->level, quoted ? " VALQUO!" : ""); - size_t nid = m_tree->append_child(m_state->node_id); - m_tree->to_val(nid, val, additional_flags); - _c4dbgpf("append val: id={} val='{}'", nid, m_tree->get(nid)->m_val.scalar); - if( ! m_val_tag.empty()) + seqblck_again: + _c4dbgt("seqblck: go again", 0); + if(_finished_line()) { - _c4dbgpf("append val[{}]: set val tag='{}' -> '{}'", nid, m_val_tag, normalize_tag(m_val_tag)); - m_tree->set_val_tag(nid, normalize_tag(m_val_tag)); - m_val_tag.clear(); + _line_ended(); + _scan_line(); + if(_finished_file()) + { + _c4dbgp("seqblck: finish!"); + _end_seq_blck(); + goto seqblck_finish; + } + _c4dbgnextline(); } - _write_val_anchor(nid); - return m_tree->get(nid); -} - -NodeData* Parser::_append_key_val(csubstr val, flag_t val_quoted) -{ - _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_map(m_state->node_id)); - type_bits additional_flags = 0; - if(m_state->flags & QSCL) - additional_flags |= KEYQUO; - if(val_quoted) - additional_flags |= VALQUO; + goto seqblck_start; - csubstr key = _consume_scalar(); - _c4dbgpf("append keyval: '{}' '{}' to parent id={} (level={}){}{}", key, val, m_state->node_id, m_state->level, (additional_flags & KEYQUO) ? " KEYQUO!" : "", (additional_flags & VALQUO) ? " VALQUO!" : ""); - size_t nid = m_tree->append_child(m_state->node_id); - m_tree->to_keyval(nid, key, val, additional_flags); - _c4dbgpf("append keyval: id={} key='{}' val='{}'", nid, m_tree->key(nid), m_tree->val(nid)); - if( ! m_key_tag.empty()) - { - _c4dbgpf("append keyval[{}]: set key tag='{}' -> '{}'", nid, m_key_tag, normalize_tag(m_key_tag)); - m_tree->set_key_tag(nid, normalize_tag(m_key_tag)); - m_key_tag.clear(); - } - if( ! m_val_tag.empty()) - { - _c4dbgpf("append keyval[{}]: set val tag='{}' -> '{}'", nid, m_val_tag, normalize_tag(m_val_tag)); - m_tree->set_val_tag(nid, normalize_tag(m_val_tag)); - m_val_tag.clear(); - } - _write_key_anchor(nid); - _write_val_anchor(nid); - rem_flags(QMRK); - return m_tree->get(nid); + seqblck_finish: + _c4dbgp("seqblck: finish"); } //----------------------------------------------------------------------------- -void Parser::_store_scalar(csubstr s, flag_t is_quoted) -{ - _c4dbgpf("state[{}]: storing scalar '{}' (flag: {}) (old scalar='{}')", - m_state-m_stack.begin(), s, m_state->flags & SSCL, m_state->scalar); - RYML_CHECK(has_none(SSCL)); - add_flags(SSCL | (is_quoted * QSCL)); - m_state->scalar = s; -} - -csubstr Parser::_consume_scalar() -{ - _c4dbgpf("state[{}]: consuming scalar '{}' (flag: {}))", m_state-m_stack.begin(), m_state->scalar, m_state->flags & SSCL); - RYML_CHECK(m_state->flags & SSCL); - csubstr s = m_state->scalar; - rem_flags(SSCL | QSCL); - m_state->scalar.clear(); - return s; -} - -void Parser::_move_scalar_from_top() -{ - if(m_stack.size() < 2) return; - State &prev = m_stack.top(1); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state == &m_stack.top()); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state != &prev); - if(prev.flags & SSCL) - { - _c4dbgpf("moving scalar '{}' from state[{}] to state[{}] (overwriting '{}')", prev.scalar, &prev-m_stack.begin(), m_state-m_stack.begin(), m_state->scalar); - add_flags(prev.flags & (SSCL | QSCL)); - m_state->scalar = prev.scalar; - rem_flags(SSCL | QSCL, &prev); - prev.scalar.clear(); - } -} -//----------------------------------------------------------------------------- -/** @todo this function is a monster and needs love. Likely, it needs - * to be split like _scan_scalar_*() */ -bool Parser::_handle_indentation() +template +void ParseEngine::_handle_map_block() { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(FLOW)); - if( ! _at_line_begin()) - return false; +mapblck_start: + _c4dbgpf("handle2_map_block: map_id={} node_id={} level={} indref={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref); - size_t ind = m_state->line_contents.indentation; - csubstr rem = m_state->line_contents.rem; - /** @todo instead of trimming, we should use the indentation index from above */ - csubstr remt = rem.triml(' '); + // states: RKEY|QMRK -> RKCL -> RVAL -> RNXT + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(BLCK)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK))); - if(remt.empty() || remt.begins_with('#')) // this is a blank or comment line - { - _line_progressed(rem.size()); - return true; - } + _maybe_skip_comment(); + csubstr rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto mapblck_again; - _c4dbgpf("indentation? ind={} indref={}", ind, m_state->indref); - if(ind == m_state->indref) + if(has_any(RKEY)) { - _c4dbgpf("same indentation: {}", ind); - if(!rem.sub(ind).begins_with('-')) + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + // + // handle indentation + // + if(m_evt_handler->m_curr->at_line_beginning()) { - _c4dbgp("does not begin with -"); - if(has_any(RMAP)) + if(m_evt_handler->m_curr->indentation_eq()) { - if(has_all(SSCL|RVAL)) - { - _c4dbgp("add with null val"); - _append_key_val_null(rem.str + ind - 1); - addrem_flags(RKEY, RVAL); - } + _c4dbgpf("mapblck[RKEY]: skip {} from indref", m_evt_handler->m_curr->indref); + _line_progressed(m_evt_handler->m_curr->indref); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto mapblck_again; } - else if(has_any(RSEQ)) + else if(m_evt_handler->m_curr->indentation_lt()) { - if(m_stack.size() > 2) // do not pop to root level + _c4dbgp("mapblck[RKEY]: smaller indentation!"); + _handle_indentation_pop_from_block_map(); + _line_progressed(m_evt_handler->m_curr->line_contents.indentation); + if(has_all(RMAP|BLCK)) { - if(has_any(RNXT)) - { - _c4dbgp("end the indentless seq"); - _pop_level(); - return true; - } - else if(has_any(RVAL)) - { - _c4dbgp("add with null val"); - _append_val_null(rem.str); - _c4dbgp("end the indentless seq"); - _pop_level(); - return true; - } + _c4dbgp("mapblck[RKEY]: still mapblck!"); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY)); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto mapblck_again; + } + else + { + _c4dbgp("mapblck[RKEY]: no longer mapblck!"); + goto mapblck_finish; } } + else + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_gt()); + _c4err("invalid indentation"); + } } - _line_progressed(ind); - return ind > 0; - } - else if(ind < m_state->indref) - { - _c4dbgpf("smaller indentation ({} < {})!!!", ind, m_state->indref); - if(has_all(RVAL)) + // + // now handle the tokens + // + const char first = rem.str[0]; + const size_t startline = m_evt_handler->m_curr->pos.line; + const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(); + _c4dbgpf("mapblck[RKEY]: '{}'", first); + ScannedScalar sc; + if(first == '\'') + { + _c4dbgp("mapblck[RKEY]: scanning single-quoted scalar"); + sc = _scan_scalar_squot(); + csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); + _handle_annotations_before_blck_key_scalar(); + m_evt_handler->set_key_scalar_squoted(maybe_filtered); + addrem_flags(RVAL, RKEY); + if(!_maybe_scan_following_colon()) + _c4err("could not find ':' colon after key"); + _maybe_skip_whitespace_tokens(); + } + else if(first == '"') + { + _c4dbgp("mapblck[RKEY]: scanning double-quoted scalar"); + sc = _scan_scalar_dquot(); + csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); + _handle_annotations_before_blck_key_scalar(); + m_evt_handler->set_key_scalar_dquoted(maybe_filtered); + addrem_flags(RVAL, RKEY); + if(!_maybe_scan_following_colon()) + _c4err("could not find ':' colon after key"); + _maybe_skip_whitespace_tokens(); + } + // block scalars (| and >) can not be used as keys unless they + // appear in an explicit QMRK scope (ie, after the ? token), + else if(C4_UNLIKELY(first == '|')) + { + _c4err("block literal keys must be enclosed in '?'"); + } + else if(C4_UNLIKELY(first == '>')) + { + _c4err("block literal keys must be enclosed in '?'"); + } + else if(_scan_scalar_plain_map_blck(&sc)) + { + _c4dbgp("mapblck[RKEY]: plain scalar"); + csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); + _handle_annotations_before_blck_key_scalar(); + m_evt_handler->set_key_scalar_plain(maybe_filtered); + addrem_flags(RVAL, RKEY); + if(!_maybe_scan_following_colon()) + _c4err("could not find ':' colon after key"); + _maybe_skip_whitespace_tokens(); + } + else if(first == '?') + { + _c4dbgp("mapblck[RKEY]: key token!"); + addrem_flags(QMRK, RKEY); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + m_was_inside_qmrk = true; + goto mapblck_again; + } + else if(first == ':') + { + _c4dbgp("mapblck[RKEY]: setting empty key"); + _handle_annotations_before_blck_key_scalar(); + m_evt_handler->set_key_scalar_plain({}); + addrem_flags(RVAL, RKEY); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else if(first == '*') + { + csubstr ref = _scan_ref_map(); + _c4dbgpf("mapblck[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref); + _handle_annotations_before_blck_key_scalar(); + m_evt_handler->set_key_ref(ref); + addrem_flags(RVAL, RKEY); + if(!_maybe_scan_following_colon()) + _c4err("could not find ':' colon after key"); + _maybe_skip_whitespace_tokens(); + } + else if(first == '&') + { + csubstr anchor = _scan_anchor(); + _c4dbgpf("mapblck[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor); + _add_annotation(&m_pending_anchors, anchor, startindent, startline); + } + else if(first == '!') + { + csubstr tag = _scan_tag(); + _c4dbgpf("mapblck[RKEY]: key tag! [{}]~~~{}~~~", tag.len, tag); + _add_annotation(&m_pending_tags, tag, startindent, startline); + } + else if(first == '[') + { + // RYML's tree cannot store container keys, but that's + // handled inside the tree handler. Other handlers may be + // able to handle it. + _c4dbgp("mapblck[RKEY]: start child seqflow (!)"); + addrem_flags(RKCL, RKEY); + _handle_annotations_before_blck_key_scalar(); + m_evt_handler->begin_seq_key_flow(); + addrem_flags(RSEQ|FLOW|RVAL, RMAP|BLCK|RKCL); + _line_progressed(1); + _set_indentation(startindent); + goto mapblck_finish; + } + else if(first == '{') + { + // RYML's tree cannot store container keys, but that's + // handled inside the tree handler. Other handlers may be + // able to handle it. + _c4dbgp("mapblck[RKEY]: start child mapflow (!)"); + addrem_flags(RKCL, RKEY); + _handle_annotations_before_blck_key_scalar(); + m_evt_handler->begin_map_key_flow(); + addrem_flags(FLOW|RKEY, BLCK|RKCL); + _line_progressed(1); + _set_indentation(startindent); + goto mapblck_finish; + } + else if(first == '-') { - _c4dbgp("there was an empty val -- appending"); - if(has_all(RMAP)) + _c4dbgp("mapblck[RKEY]: maybe doc?"); + if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_begin_token(rem)) { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(SSCL)); - _append_key_val_null(rem.sub(ind).str - 1); + _c4dbgp("mapblck[RKEY]: end+start doc"); + _start_doc_suddenly(); + _line_progressed(3); + _maybe_skip_whitespace_tokens(); + goto mapblck_finish; } - else if(has_all(RSEQ)) + else { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(SSCL)); - _append_val_null(rem.sub(ind).str - 1); + _c4err("parse error"); } } - // search the stack frame to jump to based on its indentation - State const* popto = nullptr; - _RYML_CB_ASSERT(m_stack.m_callbacks, m_stack.is_contiguous()); // this search relies on the stack being contiguous - for(State const* s = m_state-1; s >= m_stack.begin(); --s) + else if(first == '.') { - _c4dbgpf("searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id); - if(s->indref == ind) + _c4dbgp("mapblck[RKEY]: maybe end doc?"); + if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_end_token(rem)) { - _c4dbgpf("gotit!!! level={} node={}", s->level, s->node_id); - popto = s; - // while it may be tempting to think we're done at this - // point, we must still determine whether we're jumping to a - // parent with the same indentation. Consider this case with - // an indentless sequence: - // - // product: - // - sku: BL394D - // quantity: 4 - // description: Basketball - // price: 450.00 - // - sku: BL4438H - // quantity: 1 - // description: Super Hoop - // price: 2392.00 # jumping one level here would be wrong. - // tax: 1234.5 # we must jump two levels - if(popto > m_stack.begin()) - { - auto parent = popto - 1; - if(parent->indref == popto->indref) - { - _c4dbgpf("the parent (level={},node={}) has the same indentation ({}). is this in an indentless sequence?", parent->level, parent->node_id, popto->indref); - _c4dbgpf("isseq(popto)={} ismap(parent)={}", m_tree->is_seq(popto->node_id), m_tree->is_map(parent->node_id)); - if(m_tree->is_seq(popto->node_id) && m_tree->is_map(parent->node_id)) - { - if( ! remt.begins_with('-')) - { - _c4dbgp("this is an indentless sequence"); - popto = parent; - } - else - { - _c4dbgp("not an indentless sequence"); - } - } - } - } - break; + _c4dbgp("mapblck[RKEY]: end doc"); + _end_doc_suddenly(); + _line_progressed(3); + _maybe_skip_whitespace_tokens(); + goto mapblck_finish; + } + else + { + _c4err("parse error"); + } + } + _RYML_WITH_TAB_TOKENS( + else if(first == '\t') + { + _c4dbgp("mapblck[RKEY]: skip tabs"); + _maybe_skipchars('\t'); + }) + else + { + _c4err("parse error"); + } + } + else if(has_any(RKCL)) // read the key colon + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK)); + // + // handle indentation + // + if(m_evt_handler->m_curr->at_line_beginning()) + { + if(m_evt_handler->m_curr->indentation_eq()) + { + _c4dbgpf("mapblck[RKCL]: skip {} from indref", m_evt_handler->m_curr->indref); + _line_progressed(m_evt_handler->m_curr->indref); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto mapblck_again; + } + else if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt())) + { + _c4err("invalid indentation"); } } - if(!popto || popto >= m_state || popto->level >= m_state->level) + const char first = rem.str[0]; + _c4dbgpf("mapblck[RKCL]: '{}'", first); + if(first == ':') { - _c4err("parse error: incorrect indentation?"); + _c4dbgp("mapblck[RKCL]: found the colon"); + addrem_flags(RVAL, RKCL); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); } - _c4dbgpf("popping {} levels: from level {} to level {}", m_state->level-popto->level, m_state->level, popto->level); - while(m_state != popto) + else if(first == '?') { - _c4dbgpf("popping level {} (indentation={})", m_state->level, m_state->indref); - _pop_level(); + _c4dbgp("mapblck[RKCL]: got '?'. val was empty"); + _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_was_inside_qmrk); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->add_sibling(); + addrem_flags(QMRK, RKCL); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); } - _RYML_CB_ASSERT(m_stack.m_callbacks, ind == m_state->indref); - _line_progressed(ind); - return true; - } - else - { - _c4dbgpf("larger indentation ({} > {})!!!", ind, m_state->indref); - _RYML_CB_ASSERT(m_stack.m_callbacks, ind > m_state->indref); - if(has_all(RMAP|RVAL)) + else if(first == '-') { - if(_is_scalar_next__rmap_val(remt) && remt.first_of(":?") == npos) + if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem)) { - _c4dbgpf("actually it seems a value: '{}'", remt); + _c4dbgp("mapblck[RKCL]: end+start doc"); + _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem)); + _start_doc_suddenly(); + _line_progressed(3); + _maybe_skip_whitespace_tokens(); + goto mapblck_finish; } else { - addrem_flags(RKEY, RVAL); - _start_unk(); - //_move_scalar_from_top(); - _line_progressed(ind); - _save_indentation(); - return true; + _c4err("parse error"); + } + } + else if(first == '.') + { + _c4dbgp("mapblck[RKCL]: maybe end doc?"); + csubstr rs = rem.sub(1); + if(rs == ".." || rs.begins_with(".. ")) + { + _c4dbgp("mapblck[RKCL]: end+start doc"); + _end_doc_suddenly(); + _line_progressed(3); + goto mapblck_finish; + } + else + { + _c4err("parse error"); } } - else if(has_all(RSEQ|RVAL)) + else if(m_was_inside_qmrk) { - // nothing to do here + _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_eq()); + _c4dbgp("mapblck[RKCL]: missing :"); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->add_sibling(); + m_was_inside_qmrk = false; + addrem_flags(RKEY, RKCL); } else { - _c4err("parse error - indentation should not increase at this point"); + _c4err("parse error"); } } - - return false; -} - -//----------------------------------------------------------------------------- -csubstr Parser::_scan_comment() -{ - csubstr s = m_state->line_contents.rem; - _RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with('#')); - _line_progressed(s.len); - // skip the # character - s = s.sub(1); - // skip leading whitespace - s = s.right_of(s.first_not_of(' '), /*include_pos*/true); - _c4dbgpf("comment was '{}'", s); - return s; -} - -//----------------------------------------------------------------------------- -csubstr Parser::_scan_squot_scalar() -{ - // quoted scalars can spread over multiple lines! - // nice explanation here: http://yaml-multiline.info/ - - // a span to the end of the file - size_t b = m_state->pos.offset; - substr s = m_buf.sub(b); - if(s.begins_with(' ')) - { - s = s.triml(' '); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.sub(b).is_super(s)); - _RYML_CB_ASSERT(m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin()); - _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin())); - } - b = m_state->pos.offset; // take this into account - _RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with('\'')); - - // skip the opening quote - _line_progressed(1); - s = s.sub(1); - - bool needs_filter = false; - - size_t numlines = 1; // we already have one line - size_t pos = npos; // find the pos of the matching quote - while( ! _finished_file()) + else if(has_any(RVAL)) { - const csubstr line = m_state->line_contents.rem; - bool line_is_blank = true; - _c4dbgpf("scanning single quoted scalar @ line[{}]: ~~~{}~~~", m_state->pos.line, line); - for(size_t i = 0; i < line.len; ++i) - { - const char curr = line.str[i]; - if(curr == '\'') // single quotes are escaped with two single quotes + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK)); + // + // handle indentation + // + if(m_evt_handler->m_curr->at_line_beginning()) + { + _c4dbgpf("mapblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation); + m_evt_handler->m_curr->more_indented = false; + if(m_evt_handler->m_curr->indref == npos) + { + _c4dbgpf("mapblck[RVAL]: setting indentation={}", m_evt_handler->m_parent->indref); + _set_indentation(m_evt_handler->m_curr->line_contents.indentation); + _line_progressed(m_evt_handler->m_curr->indref); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto mapblck_again; + } + else if(m_evt_handler->m_curr->indentation_eq()) + { + _c4dbgp("mapblck[RVAL]: skip indentation!"); + _line_progressed(m_evt_handler->m_curr->indref); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto mapblck_again; + // TODO: this is valid: + // + // ```yaml + // a: + // b: + // --- + // a: + // b + // --- + // a: + // b: c + // ``` + // + // ... but this is not: + // + // ```yaml + // a: + // v + // --- + // a: b: c + // ``` + // + // here, we probably need to set a boolean on the state + // to disambiguate between these cases. + } + else if(m_evt_handler->m_curr->indentation_gt()) { - const char next = i+1 < line.len ? line.str[i+1] : '~'; - if(next != '\'') // so just look for the first quote - { // without another after it - pos = i; - break; + _c4dbgp("mapblck[RVAL]: more indented!"); + m_evt_handler->m_curr->more_indented = true; + _line_progressed(m_evt_handler->m_curr->line_contents.indentation); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto mapblck_again; + } + else if(m_evt_handler->m_curr->indentation_lt()) + { + _c4dbgp("mapblck[RVAL]: smaller indentation!"); + _handle_indentation_pop_from_block_map(); + if(has_all(RMAP|BLCK)) + { + _c4dbgp("mapblck[RVAL]: still mapblck!"); + _line_progressed(m_evt_handler->m_curr->line_contents.indentation); + if(has_any(RNXT)) + { + _c4dbgp("mapblck[RVAL]: speculatively expect next keyval"); + m_evt_handler->add_sibling(); + addrem_flags(RKEY, RNXT); + } + goto mapblck_again; } else { - needs_filter = true; // needs filter to remove escaped quotes - ++i; // skip the escaped quote + _c4dbgp("mapblck[RVAL]: no longer mapblck!"); + goto mapblck_finish; } } - else if(curr != ' ') + else if(m_evt_handler->m_curr->line_contents.indentation == npos) { - line_is_blank = false; + _c4dbgp("mapblck[RVAL]: empty line!"); + _line_progressed(m_evt_handler->m_curr->line_contents.rem.len); + goto mapblck_again; } } - - // leading whitespace also needs filtering - needs_filter = needs_filter - || (numlines > 1) - || line_is_blank - || (_at_line_begin() && line.begins_with(' ')); - - if(pos == npos) - { - _line_progressed(line.len); - ++numlines; - } - else - { - _RYML_CB_ASSERT(m_stack.m_callbacks, pos >= 0 && pos < m_buf.len); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf[m_state->pos.offset + pos] == '\''); - _line_progressed(pos + 1); // progress beyond the quote - pos = m_state->pos.offset - b - 1; // but we stop before it - break; + // + // now handle the tokens + // + const char first = rem.str[0]; + const size_t startline = m_evt_handler->m_curr->pos.line; + const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(); + _c4dbgpf("mapblck[RVAL]: '{}'", first); + ScannedScalar sc; + if(first == '\'') + { + _c4dbgp("mapblck[RVAL]: scanning single-quoted scalar"); + sc = _scan_scalar_squot(); + if(!_maybe_scan_following_colon()) + { + _c4dbgp("mapblck[RVAL]: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL! + m_evt_handler->set_val_scalar_squoted(maybe_filtered); + addrem_flags(RNXT, RVAL); + } + else + { + if(startindent != m_evt_handler->m_curr->indref) + { + _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key"); + _handle_annotations_before_start_mapblck(startline); + addrem_flags(RNXT, RVAL); + m_evt_handler->begin_map_val_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY! + m_evt_handler->set_key_scalar_squoted(maybe_filtered); + _maybe_skip_whitespace_tokens(); + _set_indentation(m_evt_handler->m_curr->line_contents.indentation); + // keep the child state on RVAL + addrem_flags(RVAL, RNXT); + } + else + { + _c4dbgp("mapblck[RVAL]: prev val empty+this is a key"); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->add_sibling(); + csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY! + m_evt_handler->set_key_scalar_squoted(maybe_filtered); + // keep going on RVAL + _maybe_skip_whitespace_tokens(); + } + } } - - _line_ended(); - _scan_line(); - } - - if(pos == npos) - { - _c4err("reached end of file while looking for closing quote"); - } - else - { - _RYML_CB_ASSERT(m_stack.m_callbacks, pos > 0); - _RYML_CB_ASSERT(m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end()); - _RYML_CB_ASSERT(m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '\''); - s = s.sub(0, pos-1); - } - - if(needs_filter) - { - csubstr ret = _filter_squot_scalar(s); - _RYML_CB_ASSERT(m_stack.m_callbacks, ret.len <= s.len || s.empty() || s.trim(' ').empty()); - _c4dbgpf("final scalar: \"{}\"", ret); - return ret; - } - - _c4dbgpf("final scalar: \"{}\"", s); - - return s; -} - -//----------------------------------------------------------------------------- -csubstr Parser::_scan_dquot_scalar() -{ - // quoted scalars can spread over multiple lines! - // nice explanation here: http://yaml-multiline.info/ - - // a span to the end of the file - size_t b = m_state->pos.offset; - substr s = m_buf.sub(b); - if(s.begins_with(' ')) - { - s = s.triml(' '); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.sub(b).is_super(s)); - _RYML_CB_ASSERT(m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin()); - _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin())); - } - b = m_state->pos.offset; // take this into account - _RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with('"')); - - // skip the opening quote - _line_progressed(1); - s = s.sub(1); - - bool needs_filter = false; - - size_t numlines = 1; // we already have one line - size_t pos = npos; // find the pos of the matching quote - while( ! _finished_file()) - { - const csubstr line = m_state->line_contents.rem; - bool line_is_blank = true; - _c4dbgpf("scanning double quoted scalar @ line[{}]: line='{}'", m_state->pos.line, line); - for(size_t i = 0; i < line.len; ++i) + else if(first == '"') { - const char curr = line.str[i]; - if(curr != ' ') - line_is_blank = false; - // every \ is an escape - if(curr == '\\') + _c4dbgp("mapblck[RVAL]: scanning double-quoted scalar"); + sc = _scan_scalar_dquot(); + if(!_maybe_scan_following_colon()) { - const char next = i+1 < line.len ? line.str[i+1] : '~'; - needs_filter = true; - if(next == '"' || next == '\\') - ++i; + _c4dbgp("mapblck[RVAL]: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL! + m_evt_handler->set_val_scalar_dquoted(maybe_filtered); + addrem_flags(RNXT, RVAL); } - else if(curr == '"') + else { - pos = i; - break; + if(startindent != m_evt_handler->m_curr->indref) + { + _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key"); + _handle_annotations_before_start_mapblck(startline); + addrem_flags(RNXT, RVAL); + m_evt_handler->begin_map_val_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY! + m_evt_handler->set_key_scalar_dquoted(maybe_filtered); + _maybe_skip_whitespace_tokens(); + _set_indentation(m_evt_handler->m_curr->line_contents.indentation); + // keep the child state on RVAL + addrem_flags(RVAL, RNXT); + } + else + { + _c4dbgp("mapblck[RVAL]: prev val empty+this is a key"); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->add_sibling(); + csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY! + m_evt_handler->set_key_scalar_dquoted(maybe_filtered); + // keep going on RVAL + _maybe_skip_whitespace_tokens(); + } } } - - // leading whitespace also needs filtering - needs_filter = needs_filter - || (numlines > 1) - || line_is_blank - || (_at_line_begin() && line.begins_with(' ')); - - if(pos == npos) + // block scalars can only appear as keys when in QMRK scope + // (ie, after ? tokens), so no need to scan following colon + else if(first == '|') { - _line_progressed(line.len); - ++numlines; + _c4dbgp("mapblck[RVAL]: scanning block-literal scalar"); + ScannedBlock sb; + _scan_block(&sb, m_evt_handler->m_curr->indref + 1); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb); + m_evt_handler->set_val_scalar_literal(maybe_filtered); + addrem_flags(RNXT, RVAL); } - else + else if(first == '>') { - _RYML_CB_ASSERT(m_stack.m_callbacks, pos >= 0 && pos < m_buf.len); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf[m_state->pos.offset + pos] == '"'); - _line_progressed(pos + 1); // progress beyond the quote - pos = m_state->pos.offset - b - 1; // but we stop before it - break; + _c4dbgp("mapblck[RVAL]: scanning block-folded scalar"); + ScannedBlock sb; + _scan_block(&sb, m_evt_handler->m_curr->indref + 1); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb); + m_evt_handler->set_val_scalar_folded(maybe_filtered); + addrem_flags(RNXT, RVAL); } - - _line_ended(); - _scan_line(); - } - - if(pos == npos) - { - _c4err("reached end of file looking for closing quote"); - } - else - { - _RYML_CB_ASSERT(m_stack.m_callbacks, pos > 0); - _RYML_CB_ASSERT(m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '"'); - _RYML_CB_ASSERT(m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end()); - s = s.sub(0, pos-1); - } - - if(needs_filter) - { - csubstr ret = _filter_dquot_scalar(s); - _c4dbgpf("final scalar: [{}]\"{}\"", ret.len, ret); - _RYML_CB_ASSERT(m_stack.m_callbacks, ret.len <= s.len || s.empty() || s.trim(' ').empty()); - return ret; - } - - _c4dbgpf("final scalar: \"{}\"", s); - - return s; -} - -//----------------------------------------------------------------------------- -csubstr Parser::_scan_block() -{ - // nice explanation here: http://yaml-multiline.info/ - csubstr s = m_state->line_contents.rem; - csubstr trimmed = s.triml(' '); - if(trimmed.str > s.str) - { - _c4dbgp("skipping whitespace"); - _RYML_CB_ASSERT(m_stack.m_callbacks, trimmed.str >= s.str); - _line_progressed(static_cast(trimmed.str - s.str)); - s = trimmed; - } - _RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with('|') || s.begins_with('>')); - - _c4dbgpf("scanning block: specs=\"{}\"", s); - - // parse the spec - BlockStyle_e newline = s.begins_with('>') ? BLOCK_FOLD : BLOCK_LITERAL; - BlockChomp_e chomp = CHOMP_CLIP; // default to clip unless + or - are used - size_t indentation = npos; // have to find out if no spec is given - csubstr digits; - if(s.len > 1) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with_any("|>")); - csubstr t = s.sub(1); - _c4dbgpf("scanning block: spec is multichar: '{}'", t); - _RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 1); - size_t pos = t.first_of("-+"); - _c4dbgpf("scanning block: spec chomp char at {}", pos); - if(pos != npos) + else if(_scan_scalar_plain_map_blck(&sc)) { - if(t[pos] == '-') - chomp = CHOMP_STRIP; - else if(t[pos] == '+') - chomp = CHOMP_KEEP; - if(pos == 0) - t = t.sub(1); + _c4dbgp("mapblck[RVAL]: plain scalar."); + if(!_maybe_scan_following_colon()) + { + _c4dbgp("mapblck[RVAL]: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); // VAL! + m_evt_handler->set_val_scalar_plain(maybe_filtered); + addrem_flags(RNXT, RVAL); + } + else + { + if(startindent != m_evt_handler->m_curr->indref) + { + _c4dbgpf("mapblck[RVAL]: start new block map, set scalar as key {}", m_evt_handler->m_curr->indref); + addrem_flags(RNXT, RVAL); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->begin_map_val_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY! + m_evt_handler->set_key_scalar_plain(maybe_filtered); + _maybe_skip_whitespace_tokens(); + _set_indentation(m_evt_handler->m_curr->line_contents.indentation); + // keep the child state on RVAL + addrem_flags(RVAL, RNXT); + } + else + { + _c4dbgp("mapblck[RVAL]: prev val empty+this is a key"); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->add_sibling(); + csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY! + m_evt_handler->set_key_scalar_plain(maybe_filtered); + // keep going on RVAL + _maybe_skip_whitespace_tokens(); + } + } + } + else if(first == '-') + { + if(rem.len == 1 || rem.str[1] == ' ' _RYML_WITH_TAB_TOKENS(|| rem.str[1] == '\t')) + { + _c4dbgp("mapblck[RVAL]: start val seqblck"); + addrem_flags(RNXT, RVAL); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_seq_val_block(); + addrem_flags(RSEQ|RVAL, RMAP|RNXT); + _set_indentation(startindent); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + goto mapblck_finish; + } + else if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem)) + { + _c4dbgp("mapblck[RVAL]: end+start doc"); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem)); + _start_doc_suddenly(); + _line_progressed(3); + _maybe_skip_whitespace_tokens(); + goto mapblck_finish; + } else - t = t.first(pos); + { + _c4err("parse error"); + } } - // from here to the end, only digits are considered - digits = t.left_of(t.first_not_of("0123456789")); - if( ! digits.empty()) + else if(first == '[') { - if( ! c4::atou(digits, &indentation)) - _c4err("parse error: could not read decimal"); - _c4dbgpf("scanning block: indentation specified: {}. add {} from curr state -> {}", indentation, m_state->indref, indentation+m_state->indref); - indentation += m_state->indref; + _c4dbgp("mapblck[RVAL]: start val seqflow"); + addrem_flags(RNXT, RVAL); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_seq_val_flow(); + addrem_flags(RSEQ|FLOW|RVAL, RMAP|BLCK|RNXT|BLCK); + _set_indentation(m_evt_handler->m_curr->indref + 1u); + _line_progressed(1); + goto mapblck_finish; } - } - - // finish the current line - _line_progressed(s.len); - _line_ended(); - _scan_line(); - - _c4dbgpf("scanning block: style={} chomp={} indentation={}", newline==BLOCK_FOLD ? "fold" : "literal", chomp==CHOMP_CLIP ? "clip" : (chomp==CHOMP_STRIP ? "strip" : "keep"), indentation); - - // start with a zero-length block, already pointing at the right place - substr raw_block(m_buf.data() + m_state->pos.offset, size_t(0));// m_state->line_contents.full.sub(0, 0); - _RYML_CB_ASSERT(m_stack.m_callbacks, raw_block.begin() == m_state->line_contents.full.begin()); - - // read every full line into a raw block, - // from which newlines are to be stripped as needed. - // - // If no explicit indentation was given, pick it from the first - // non-empty line. See - // https://yaml.org/spec/1.2.2/#8111-block-indentation-indicator - size_t num_lines = 0, first = m_state->pos.line, provisional_indentation = npos; - LineContents lc; - while(( ! _finished_file())) - { - // peek next line, but do not advance immediately - lc.reset_with_next_line(m_buf, m_state->pos.offset); - _c4dbgpf("scanning block: peeking at '{}'", lc.stripped); - // evaluate termination conditions - if(indentation != npos) + else if(first == '{') { - // stop when the line is deindented and not empty - if(lc.indentation < indentation && ( ! lc.rem.trim(" \t\r\n").empty())) + _c4dbgp("mapblck[RVAL]: start val mapflow"); + addrem_flags(RNXT, RVAL); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_map_val_flow(); + addrem_flags(RKEY|FLOW, BLCK|RVAL|RNXT); + m_evt_handler->m_curr->scalar_col = m_evt_handler->m_curr->line_contents.indentation; + _set_indentation(m_evt_handler->m_curr->indref + 1u); + _line_progressed(1); + goto mapblck_finish; + } + else if(first == '*') + { + csubstr ref = _scan_ref_map(); + _c4dbgpf("mapblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref); + if(startindent == m_evt_handler->m_curr->indref) { - _c4dbgpf("scanning block: indentation decreased ref={} thisline={}", indentation, lc.indentation); - break; + _c4dbgpf("mapblck[RVAL]: same indentation {}", startindent); + m_evt_handler->set_val_ref(ref); + addrem_flags(RNXT, RVAL); } - else if(indentation == 0) + else { - if((lc.rem == "..." || lc.rem.begins_with("... ")) - || - (lc.rem == "---" || lc.rem.begins_with("--- "))) + _c4dbgpf("mapblck[RVAL]: larger indentation {}>{}", startindent, m_evt_handler->m_curr->indref); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref); + if(_maybe_scan_following_colon()) { - _c4dbgp("scanning block: stop. indentation=0 and stream ended"); - break; + _c4dbgp("mapblck[RVAL]: start child map, block"); + addrem_flags(RNXT, RVAL); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_map_val_block(); + m_evt_handler->set_key_ref(ref); + _set_indentation(startindent); + // keep going in RVAL + addrem_flags(RVAL, RNXT); + } + else + { + _c4dbgp("mapblck[RVAL]: was val ref"); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->set_val_ref(ref); + addrem_flags(RNXT, RVAL); } } + _maybe_skip_whitespace_tokens(); + } + else if(first == '&') + { + csubstr anchor = _scan_anchor(); + _c4dbgpf("mapblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor); + if(startindent == m_evt_handler->m_curr->indref) + { + _c4dbgp("mapblck[RVAL]: anchor for next key. val is missing!"); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->add_sibling(); + addrem_flags(RKEY, RVAL); + } + // we need to buffer the anchors, as there may be two + // consecutive anchors in here + _add_annotation(&m_pending_anchors, anchor, startindent, startline); + } + else if(first == '!') + { + csubstr tag = _scan_tag(); + _c4dbgpf("mapblck[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag); + if(startindent == m_evt_handler->m_curr->indref) + { + _c4dbgp("mapblck[RVAL]: tag for next key. val is missing!"); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->add_sibling(); + addrem_flags(RKEY, RVAL); + } + // we need to buffer the tags, as there may be two + // consecutive tags in here + _add_annotation(&m_pending_tags, tag, startindent, startline); } + else if(first == '?') + { + if(startindent == m_evt_handler->m_curr->indref) + { + _c4dbgp("mapblck[RVAL]: got '?'. val was empty"); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->add_sibling(); + addrem_flags(QMRK, RVAL); + } + else if(startindent > m_evt_handler->m_curr->indref) + { + _c4dbgp("mapblck[RVAL]: start val mapblck"); + addrem_flags(RNXT, RVAL); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_map_val_block(); + addrem_flags(QMRK|BLCK, RNXT); + _set_indentation(startindent); + } + else + { + _c4err("parse error"); + } + m_was_inside_qmrk = true; + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + goto mapblck_again; + } + else if(first == ':') + { + if(startindent == m_evt_handler->m_curr->indref) + { + _c4dbgp("mapblck[RVAL]: got ':'. val was empty, next key as well"); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->add_sibling(); + m_evt_handler->set_key_scalar_plain({}); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + goto mapblck_again; + } + else + { + _c4err("parse error"); + } + } + else if(first == '.') + { + _c4dbgp("mapblck[RVAL]: maybe doc?"); + csubstr rs = rem.sub(1); + if(rs == ".." || rs.begins_with(".. ")) + { + _c4dbgp("seqblck[RVAL]: end doc expl"); + _end_doc_suddenly(); + _line_progressed(3); + _maybe_skip_whitespace_tokens(); + goto mapblck_finish; + } + else + { + _c4err("parse error"); + } + } + _RYML_WITH_TAB_TOKENS( + else if(first == '\t') + { + _c4dbgp("mapblck[RVAL]: skip tabs"); + _maybe_skipchars('\t'); + }) else { - _c4dbgpf("scanning block: indentation ref not set. firstnonws={}", lc.stripped.first_not_of(' ')); - if(lc.stripped.first_not_of(' ') != npos) // non-empty line + _c4err("parse error"); + } + } + else if(has_any(RNXT)) + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK)); + // + // handle indentation + // + if(m_evt_handler->m_curr->at_line_beginning()) + { + _c4dbgpf("mapblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation); + if(m_evt_handler->m_curr->indentation_eq()) { - _c4dbgpf("scanning block: line not empty. indref={} indprov={} indentation={}", m_state->indref, provisional_indentation, lc.indentation); - if(provisional_indentation == npos) + _c4dbgpf("mapblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref); + _line_progressed(m_evt_handler->m_curr->indref); + _c4dbgp("mapblck[RNXT]: speculatively expect next keyval"); + m_evt_handler->add_sibling(); + addrem_flags(RKEY, RNXT); + goto mapblck_again; + } + else if(m_evt_handler->m_curr->indentation_lt()) + { + _c4dbgp("mapblck[RNXT]: smaller indentation!"); + _handle_indentation_pop_from_block_map(); + if(has_all(RMAP|BLCK)) { - if(lc.indentation < m_state->indref) - { - _c4dbgpf("scanning block: block terminated indentation={} < indref={}", lc.indentation, m_state->indref); - if(raw_block.len == 0) - { - _c4dbgp("scanning block: was empty, undo next line"); - _line_ended_undo(); - } - break; - } - else if(lc.indentation == m_state->indref) + _line_progressed(m_evt_handler->m_curr->line_contents.indentation); + if(!has_any(RKCL)) { - if(has_any(RSEQ|RMAP)) - { - _c4dbgpf("scanning block: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_state->indref); - break; - } + _c4dbgp("mapblck[RNXT]: speculatively expect next keyval"); + m_evt_handler->add_sibling(); + addrem_flags(RKEY, RNXT); } - _c4dbgpf("scanning block: set indentation ref from this line: ref={}", lc.indentation); - indentation = lc.indentation; + goto mapblck_again; } else { - if(lc.indentation >= provisional_indentation) - { - _c4dbgpf("scanning block: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation); - //indentation = provisional_indentation ? provisional_indentation : lc.indentation; - indentation = lc.indentation; - } - else - { - break; - //_c4err("parse error: first non-empty block line should have at least the original indentation"); - } + goto mapblck_finish; } } - else // empty line + } + // + // handle tokens + // + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.len > 0); + const char first = rem.str[0]; + _c4dbgpf("mapblck[RNXT]: '{}'", _c4prc(first)); + if(first == ':') + { + if(m_evt_handler->m_curr->more_indented) { - _c4dbgpf("scanning block: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.stripped.len, lc.indentation, provisional_indentation); - if(provisional_indentation != npos) + _c4dbgp("mapblck[RNXT]: start child block map"); + C4_NOT_IMPLEMENTED(); + //m_evt_handler->actually_as_block_map(); + _line_progressed(1); + _set_indentation(m_evt_handler->m_curr->scalar_col); + m_evt_handler->m_curr->more_indented = false; + goto mapblck_again; + } + else + { + _c4err("parse error"); + } + } + else if(first == ' ') + { + _c4dbgp("mapblck[RNXT]: skip spaces"); + _maybe_skip_whitespace_tokens(); + } + else + { + _c4err("parse error"); + } + } + else if(has_any(QMRK)) + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + // + // handle indentation + // + if(m_evt_handler->m_curr->at_line_beginning()) + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.indentation != npos); + if(m_evt_handler->m_curr->indentation_eq()) + { + _c4dbgpf("mapblck[QMRK]: skip {} from indref", m_evt_handler->m_curr->indref); + _line_progressed(m_evt_handler->m_curr->indref); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto mapblck_again; + } + else if(m_evt_handler->m_curr->indentation_lt()) + { + _c4dbgp("mapblck[QMRK]: smaller indentation!"); + _handle_indentation_pop_from_block_map(); + _line_progressed(m_evt_handler->m_curr->line_contents.indentation); + if(has_all(RMAP|BLCK)) { - if(lc.stripped.len >= provisional_indentation) - { - _c4dbgpf("scanning block: increase provisional_ref {} -> {}", provisional_indentation, lc.stripped.len); - provisional_indentation = lc.stripped.len; - } - #ifdef RYML_NO_COVERAGE__TO_BE_DELETED - else if(lc.indentation >= provisional_indentation && lc.indentation != npos) - { - _c4dbgpf("scanning block: increase provisional_ref {} -> {}", provisional_indentation, lc.indentation); - provisional_indentation = lc.indentation; - } - #endif + _c4dbgp("mapblck[QMRK]: still mapblck!"); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(QMRK)); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto mapblck_again; } else { - provisional_indentation = lc.indentation ? lc.indentation : has_any(RSEQ|RVAL); - _c4dbgpf("scanning block: initialize provisional_ref={}", provisional_indentation); - if(provisional_indentation == npos) - { - provisional_indentation = lc.stripped.len ? lc.stripped.len : has_any(RSEQ|RVAL); - _c4dbgpf("scanning block: initialize provisional_ref={}", provisional_indentation); - } + _c4dbgp("mapblck[QMRK]: no longer mapblck!"); + goto mapblck_finish; } } + // indentation can be larger in QMRK state + else + { + _c4dbgp("mapblck[QMRK]: larger indentation !"); + _line_progressed(m_evt_handler->m_curr->line_contents.indentation); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto mapblck_again; + } + } + // + // now handle the tokens + // + const char first = rem.str[0]; + const size_t startline = m_evt_handler->m_curr->pos.line; + const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(); + _c4dbgpf("mapblck[QMRK]: '{}'", first); + ScannedScalar sc; + if(first == '\'') + { + _c4dbgp("mapblck[QMRK]: scanning single-quoted scalar"); + sc = _scan_scalar_squot(); + csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY! + if(!_maybe_scan_following_colon()) + { + _c4dbgp("mapblck[QMRK]: set as key"); + _handle_annotations_before_blck_key_scalar(); + m_evt_handler->set_key_scalar_squoted(maybe_filtered); + addrem_flags(RKCL, QMRK); + } + else + { + _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key"); + addrem_flags(RKCL, QMRK); + _handle_annotations_before_start_mapblck_as_key(); + m_evt_handler->begin_map_key_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + m_evt_handler->set_key_scalar_squoted(maybe_filtered); + _maybe_skip_whitespace_tokens(); + _set_indentation(startindent); + // keep the child state on RVAL + addrem_flags(RVAL, RKCL|QMRK); + } + } + else if(first == '"') + { + _c4dbgp("mapblck[QMRK]: scanning double-quoted scalar"); + sc = _scan_scalar_dquot(); + csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY! + if(!_maybe_scan_following_colon()) + { + _c4dbgp("mapblck[QMRK]: set as key"); + _handle_annotations_before_blck_key_scalar(); + m_evt_handler->set_key_scalar_dquoted(maybe_filtered); + addrem_flags(RKCL, QMRK); + } + else + { + _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key"); + addrem_flags(RKCL, QMRK); + _handle_annotations_before_start_mapblck_as_key(); + m_evt_handler->begin_map_key_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + m_evt_handler->set_key_scalar_dquoted(maybe_filtered); + _maybe_skip_whitespace_tokens(); + _set_indentation(startindent); + // keep the child state on RVAL + addrem_flags(RVAL, RKCL|QMRK); + } + } + else if(first == '|') + { + _c4dbgp("mapblck[QMRK]: scanning block-literal scalar"); + ScannedBlock sb; + _scan_block(&sb, m_evt_handler->m_curr->indref + 1); + csubstr maybe_filtered = _maybe_filter_key_scalar_literal(sb); // KEY! + _handle_annotations_before_blck_key_scalar(); + m_evt_handler->set_key_scalar_literal(maybe_filtered); + addrem_flags(RKCL, QMRK); + } + else if(first == '>') + { + _c4dbgp("mapblck[QMRK]: scanning block-literal scalar"); + ScannedBlock sb; + _scan_block(&sb, m_evt_handler->m_curr->indref + 1); + csubstr maybe_filtered = _maybe_filter_key_scalar_folded(sb); // KEY! + _handle_annotations_before_blck_key_scalar(); + m_evt_handler->set_key_scalar_folded(maybe_filtered); + addrem_flags(RKCL, QMRK); + } + else if(_scan_scalar_plain_map_blck(&sc)) + { + _c4dbgp("mapblck[QMRK]: plain scalar"); + csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY! + if(!_maybe_scan_following_colon()) + { + _c4dbgp("mapblck[QMRK]: set as key"); + _handle_annotations_before_blck_key_scalar(); + m_evt_handler->set_key_scalar_plain(maybe_filtered); + addrem_flags(RKCL, QMRK); + } + else + { + _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key"); + addrem_flags(RKCL, QMRK); + _handle_annotations_before_start_mapblck_as_key(); + m_evt_handler->begin_map_key_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + m_evt_handler->set_key_scalar_plain(maybe_filtered); + _maybe_skip_whitespace_tokens(); + _set_indentation(startindent); + // keep the child state on RVAL + addrem_flags(RVAL, RKCL|QMRK); + } + } + else if(first == ':') + { + if(startindent == m_evt_handler->m_curr->indref) + { + _c4dbgp("mapblck[QMRK]: empty key"); + addrem_flags(RVAL, QMRK); + _handle_annotations_before_blck_key_scalar(); + m_evt_handler->set_key_scalar_plain({}); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else + { + _c4dbgp("mapblck[QMRK]: start new block map as key (!), empty key"); + addrem_flags(RKCL, QMRK); + _handle_annotations_before_start_mapblck_as_key(); + m_evt_handler->begin_map_key_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + m_evt_handler->set_key_scalar_plain({}); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + _set_indentation(startindent); + // keep the child state on RVAL + addrem_flags(RVAL, RKCL|QMRK); + } + } + else if(first == '*') + { + csubstr ref = _scan_ref_map(); + _c4dbgpf("mapblck[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref); + if(!_maybe_scan_following_colon()) + { + _c4dbgp("mapblck[QMRK]: set ref as key"); + _handle_annotations_before_blck_key_scalar(); + m_evt_handler->set_key_ref(ref); + addrem_flags(RKCL, QMRK); + } + else + { + _c4dbgp("mapblck[QMRK]: start new block map as key (!), set ref as key"); + addrem_flags(RKCL, QMRK); + _handle_annotations_before_blck_key_scalar(); + m_evt_handler->begin_map_key_block(); + m_evt_handler->set_key_ref(ref); + _set_indentation(startindent); + // keep the child state on RVAL + addrem_flags(RVAL, RKCL|QMRK); + } + _maybe_skip_whitespace_tokens(); + } + else if(first == '&') + { + csubstr anchor = _scan_anchor(); + _c4dbgpf("mapblck[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor); + _add_annotation(&m_pending_anchors, anchor, startindent, startline); + } + else if(first == '!') + { + csubstr tag = _scan_tag(); + _c4dbgpf("mapblck[QMRK]: key tag! [{}]~~~{}~~~", tag.len, tag); + _add_annotation(&m_pending_tags, tag, startindent, startline); + } + else if(first == '-') + { + _c4dbgp("mapblck[QMRK]: maybe doc?"); + csubstr rs = rem.sub(1); + if(rs == "--" || rs.begins_with("-- ")) + { + _c4dbgp("mapblck[QMRK]: end+start doc"); + _start_doc_suddenly(); + _line_progressed(3); + } + else + { + _c4dbgp("mapblck[QMRK]: start child seqblck (!)"); + addrem_flags(RKCL, RKEY|QMRK); + m_evt_handler->begin_seq_key_block(); + addrem_flags(RVAL|RSEQ, RMAP|RKCL|QMRK); + _set_indentation(startindent); + _line_progressed(1); + } + _maybe_skip_whitespace_tokens(); + goto mapblck_finish; + } + else if(first == '[') + { + _c4dbgp("mapblck[QMRK]: start child seqflow (!)"); + addrem_flags(RKCL, RKEY|QMRK); + m_evt_handler->begin_seq_key_flow(); + addrem_flags(RVAL|RSEQ|FLOW, RMAP|RKCL|QMRK|BLCK); + _set_indentation(m_evt_handler->m_parent->indref); + _line_progressed(1); + goto mapblck_finish; + } + else if(first == '{') + { + _c4dbgp("mapblck[QMRK]: start child mapblck (!)"); + addrem_flags(RKCL, RKEY|QMRK); + m_evt_handler->begin_map_key_flow(); + addrem_flags(RKEY|FLOW, RVAL|RKCL|QMRK|BLCK); + _set_indentation(m_evt_handler->m_parent->indref); + _line_progressed(1); + goto mapblck_finish; + } + else if(first == '?') + { + _c4dbgp("mapblck[QMRK]: another QMRK '?'"); + m_evt_handler->set_key_scalar_plain({}); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->add_sibling(); + _line_progressed(1); + } + else if(first == '.') + { + _c4dbgp("mapblck[QMRK]: maybe end doc?"); + csubstr rs = rem.sub(1); + if(rs == ".." || rs.begins_with(".. ")) + { + _c4dbgp("mapblck[QMRK]: end+start doc"); + _end_doc_suddenly(); + _line_progressed(3); + goto mapblck_finish; + } + else + { + _c4err("parse error"); + } + } + else + { + _c4err("parse error"); } - // advance now that we know the folded scalar continues - m_state->line_contents = lc; - _c4dbgpf("scanning block: append '{}'", m_state->line_contents.rem); - raw_block.len += m_state->line_contents.full.len; - _line_progressed(m_state->line_contents.rem.len); - _line_ended(); - ++num_lines; } - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.line == (first + num_lines) || (raw_block.len == 0)); - C4_UNUSED(num_lines); - C4_UNUSED(first); - if(indentation == npos) + mapblck_again: + _c4dbgt("mapblck: again", 0); + if(_finished_line()) { - _c4dbgpf("scanning block: set indentation from provisional: {}", provisional_indentation); - indentation = provisional_indentation; - } - - if(num_lines) - _line_ended_undo(); - - _c4dbgpf("scanning block: raw=~~~{}~~~", raw_block); - - // ok! now we strip the newlines and spaces according to the specs - s = _filter_block_scalar(raw_block, newline, chomp, indentation); - - _c4dbgpf("scanning block: final=~~~{}~~~", s); + _line_ended(); + _scan_line(); + if(_finished_file()) + { + _c4dbgp("mapblck: file finished!"); + _end_map_blck(); + goto mapblck_finish; + } + _c4dbgnextline(); + } + goto mapblck_start; - return s; + mapblck_finish: + _c4dbgp("mapblck: finish"); } //----------------------------------------------------------------------------- -template -bool Parser::_filter_nl(substr r, size_t *C4_RESTRICT i, size_t *C4_RESTRICT pos, size_t indentation) +template +void ParseEngine::_handle_unk_json() { - // a debugging scaffold: - #if 0 - #define _c4dbgfnl(fmt, ...) _c4dbgpf("filter_nl[{}]: " fmt, *i, __VA_ARGS__) - #else - #define _c4dbgfnl(...) - #endif + _c4dbgpf("handle_unk_json indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id); + + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RTOP)); - const char curr = r[*i]; - bool replaced = false; + _maybe_skip_comment(); + csubstr rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + return; - _RYML_CB_ASSERT(m_stack.m_callbacks, indentation != npos); - _RYML_CB_ASSERT(m_stack.m_callbacks, curr == '\n'); + size_t pos = rem.first_not_of(" \t"); + if(pos) + { + pos = pos != npos ? pos : rem.len; + _c4dbgpf("skipping indentation of {}", pos); + _line_progressed(pos); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + return; + _c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem); + } - _c4dbgfnl("found newline. sofar=[{}]~~~{}~~~", *pos, m_filter_arena.first(*pos)); - size_t ii = *i; - size_t numnl_following = count_following_newlines(r, &ii, indentation); - if(numnl_following) + if(rem.begins_with('[')) + { + _c4dbgp("it's a seq"); + m_evt_handler->check_trailing_doc_token(); + _maybe_begin_doc(); + m_evt_handler->begin_seq_val_flow(); + addrem_flags(RSEQ|FLOW|RVAL, RUNK|RTOP|RDOC); + _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem)); + m_doc_empty = false; + _line_progressed(1); + } + else if(rem.begins_with('{')) { - _c4dbgfnl("{} consecutive (empty) lines {} in the middle. totalws={}", 1+numnl_following, ii < r.len ? "in the middle" : "at the end", ii - *i); - for(size_t j = 0; j < numnl_following; ++j) - m_filter_arena.str[(*pos)++] = '\n'; + _c4dbgp("it's a map"); + m_evt_handler->check_trailing_doc_token(); + _maybe_begin_doc(); + m_evt_handler->begin_map_val_flow(); + addrem_flags(RMAP|FLOW|RKEY, RVAL|RTOP|RUNK|RDOC); + m_doc_empty = false; + _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem)); + _line_progressed(1); } else { - if(r.first_not_of(" \t", *i+1) != npos) - { - m_filter_arena.str[(*pos)++] = ' '; - _c4dbgfnl("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, r.len, *pos, m_filter_arena.first(*pos)); - replaced = true; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL)); + _maybe_skip_whitespace_tokens(); + csubstr s = m_evt_handler->m_curr->line_contents.rem; + if(!s.len) + return; + const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save + const char first = s.str[0]; + ScannedScalar sc; + if(first == '"') + { + _c4dbgp("runk_json: scanning double-quoted scalar"); + m_evt_handler->check_trailing_doc_token(); + _maybe_begin_doc(); + add_flags(RDOC); + m_doc_empty = false; + sc = _scan_scalar_dquot(); + csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); + if(!_maybe_scan_following_colon()) + { + _c4dbgp("runk_json: set as val"); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->set_val_scalar_dquoted(maybe_filtered); + } + else + { + _c4err("parse error"); + } } - else + else if(_scan_scalar_plain_unk(&sc)) { - if C4_IF_CONSTEXPR (keep_trailing_whitespace) + _c4dbgp("runk_json: got a plain scalar"); + m_evt_handler->check_trailing_doc_token(); + _maybe_begin_doc(); + add_flags(RDOC); + m_doc_empty = false; + if(!_maybe_scan_following_colon()) { - m_filter_arena.str[(*pos)++] = ' '; - _c4dbgfnl("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, r.len, *pos, m_filter_arena.first(*pos)); - replaced = true; + _c4dbgp("runk_json: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent); + m_evt_handler->set_val_scalar_plain(maybe_filtered); } else { - _c4dbgfnl("last newline, everything else is whitespace. ii={}/{}", ii, r.len); - *i = r.len; + _c4err("parse error"); } } - if C4_IF_CONSTEXPR (backslash_is_escape) + else { - if(ii < r.len && r.str[ii] == '\\') - { - const char next = ii+1 < r.len ? r.str[ii+1] : '\0'; - if(next == ' ' || next == '\t') - { - _c4dbgfnl("extend skip to backslash{}", ""); - ++ii; - } - } + _c4err("parse error"); } } - *i = ii - 1; // correct for the loop increment - - #undef _c4dbgfnl - - return replaced; } //----------------------------------------------------------------------------- -template -void Parser::_filter_ws(substr r, size_t *C4_RESTRICT i, size_t *C4_RESTRICT pos) +template +void ParseEngine::_handle_unk() { - // a debugging scaffold: - #if 0 - #define _c4dbgfws(fmt, ...) _c4dbgpf("filt_nl[{}]: " fmt, *i, __VA_ARGS__) - #else - #define _c4dbgfws(...) - #endif + _c4dbgpf("handle_unk indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id); + + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RTOP)); - const char curr = r[*i]; - _c4dbgfws("found whitespace '{}'", _c4prc(curr)); - _RYML_CB_ASSERT(m_stack.m_callbacks, curr == ' ' || curr == '\t'); + _maybe_skip_comment(); + csubstr rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + return; + + size_t pos = rem.first_not_of(" \t"); + if(pos) + { + pos = pos != npos ? pos : rem.len; + _c4dbgpf("skipping {} whitespace characters", pos); + _line_progressed(pos); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + return; + _c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem); + } - size_t first = *i > 0 ? r.first_not_of(" \t", *i) : r.first_not_of(' ', *i); - if(first != npos) + if(m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin()) { - if(r[first] == '\n' || r[first] == '\r') // skip trailing whitespace + const char first = rem.str[0]; + _c4dbgp("rtop: zero indent + at line begin"); + if(first == '-') + { + _c4dbgp("rtop: suspecting doc"); + if(_is_doc_begin_token(rem)) + { + _c4dbgp("rtop: begin doc"); + _maybe_end_doc(); + _begin2_doc_expl(); + _set_indentation(0); + addrem_flags(RDOC|RUNK, NDOC); + _line_progressed(3u); + _maybe_skip_whitespace_tokens(); + return; + } + } + else if(first == '.') { - _c4dbgfws("whitespace is trailing on line. firstnonws='{}'@{}", _c4prc(r[first]), first); - *i = first - 1; // correct for the loop increment + _c4dbgp("rtop: suspecting doc end"); + if(_is_doc_end_token(rem)) + { + _c4dbgp("rtop: end doc"); + if(has_any(RDOC)) + { + _end2_doc_expl(); + } + else + { + _c4dbgp("rtop: ignore end doc"); + } + addrem_flags(NDOC|RUNK, RDOC); + _line_progressed(3u); + _maybe_skip_whitespace_tokens(); + return; + } } - else // a legit whitespace + else if(first == '%') { - m_filter_arena.str[(*pos)++] = curr; - _c4dbgfws("legit whitespace. sofar=[{}]~~~{}~~~", *pos, m_filter_arena.first(*pos)); + _c4dbgpf("directive: {}", rem); + if(C4_UNLIKELY(!m_doc_empty && has_none(NDOC))) + _RYML_CB_ERR(m_evt_handler->m_stack.m_callbacks, "need document footer before directives"); + _handle_directive(rem); + return; } } - else - { - _c4dbgfws("... everything else is trailing whitespace{}", ""); - if C4_IF_CONSTEXPR (keep_trailing_whitespace) - for(size_t j = *i; j < r.len; ++j) - m_filter_arena.str[(*pos)++] = r[j]; - *i = r.len; - } - - #undef _c4dbgfws -} - - -//----------------------------------------------------------------------------- -csubstr Parser::_filter_plain_scalar(substr s, size_t indentation) -{ - // a debugging scaffold: - #if 0 - #define _c4dbgfps(...) _c4dbgpf("filt_plain_scalar" __VA_ARGS__) - #else - #define _c4dbgfps(...) - #endif - _c4dbgfps("before=~~~{}~~~", s); + /* no else-if! */ + char first = rem.str[0]; - substr r = s.triml(" \t"); - _grow_filter_arena(r.len); - size_t pos = 0; // the filtered size - bool filtered_chars = false; - for(size_t i = 0; i < r.len; ++i) + if(first == '[') { - const char curr = r.str[i]; - _c4dbgfps("[{}]: '{}'", i, _c4prc(curr)); - if(curr == ' ' || curr == '\t') + m_evt_handler->check_trailing_doc_token(); + _maybe_begin_doc(); + m_doc_empty = false; + const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(rem); + if(C4_LIKELY( ! _annotations_require_key_container())) { - _filter_ws(r, &i, &pos); + _c4dbgp("it's a seq, flow"); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_seq_val_flow(); + addrem_flags(RSEQ|FLOW|RVAL, RUNK|RTOP|RDOC); + _set_indentation(startindent); } - else if(curr == '\n') + else { - filtered_chars = _filter_nl(r, &i, &pos, indentation); + _c4dbgp("start new block map, set flow seq as key (!)"); + _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line); + m_evt_handler->begin_map_val_block(); + addrem_flags(RMAP|BLCK|RKCL, RUNK|RTOP|RDOC); + _handle_annotations_and_indentation_after_start_mapblck(startindent, m_evt_handler->m_curr->pos.line); + m_evt_handler->begin_seq_key_flow(); + addrem_flags(RSEQ|FLOW|RVAL, RMAP|BLCK|RKCL); + _set_indentation(startindent); } - else if(curr == '\r') // skip \r --- https://stackoverflow.com/questions/1885900 + _line_progressed(1); + } + else if(first == '{') + { + m_evt_handler->check_trailing_doc_token(); + _maybe_begin_doc(); + m_doc_empty = false; + const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(rem); + if(C4_LIKELY( ! _annotations_require_key_container())) { - ; + _c4dbgp("it's a map, flow"); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_map_val_flow(); + addrem_flags(RMAP|FLOW|RKEY, RVAL|RTOP|RUNK|RDOC); + _set_indentation(startindent); } else { - m_filter_arena.str[pos++] = r[i]; + _c4dbgp("start new block map, set flow map as key (!)"); + _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line); + m_evt_handler->begin_map_val_block(); + addrem_flags(RMAP|BLCK|RKCL, RUNK|RTOP|RDOC); + _handle_annotations_and_indentation_after_start_mapblck(startindent, m_evt_handler->m_curr->pos.line); + m_evt_handler->begin_map_key_flow(); + addrem_flags(RMAP|FLOW|RKEY, BLCK|RKCL); + _set_indentation(startindent); } + _line_progressed(1); } - - _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len); - if(pos < r.len || filtered_chars) + else if(first == '-' && _is_blck_token(rem)) { - r = _finish_filter_arena(r, pos); + _c4dbgp("it's a seq, block"); + m_evt_handler->check_trailing_doc_token(); + _maybe_begin_doc(); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_seq_val_block(); + addrem_flags(RSEQ|BLCK|RVAL, RNXT|RTOP|RUNK|RDOC); + m_doc_empty = false; + _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem)); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else if(first == '?' && _is_blck_token(rem)) + { + _c4dbgp("it's a map + this key is complex"); + m_evt_handler->check_trailing_doc_token(); + _maybe_begin_doc(); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_map_val_block(); + addrem_flags(RMAP|BLCK|QMRK, RKEY|RVAL|RTOP|RUNK); + m_doc_empty = false; + m_was_inside_qmrk = true; + _save_indentation(); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); } - - _RYML_CB_ASSERT(m_stack.m_callbacks, s.len >= r.len); - _c4dbgfps("#filteredchars={} after=~~~{}~~~", s.len - r.len, r); - - #undef _c4dbgfps - return r; -} - - -//----------------------------------------------------------------------------- -csubstr Parser::_filter_squot_scalar(substr s) -{ - // a debugging scaffold: - #if 0 - #define _c4dbgfsq(...) _c4dbgpf("filt_squo_scalar") - #else - #define _c4dbgfsq(...) - #endif - - // from the YAML spec for double-quoted scalars: - // https://yaml.org/spec/1.2-old/spec.html#style/flow/single-quoted - - _c4dbgfsq(": before=~~~{}~~~", s); - - _grow_filter_arena(s.len); - substr r = s; - size_t pos = 0; // the filtered size - bool filtered_chars = false; - for(size_t i = 0; i < r.len; ++i) + else if(first == ':' && _is_blck_token(rem)) { - const char curr = r[i]; - _c4dbgfsq("[{}]: '{}'", i, _c4prc(curr)); - if(curr == ' ' || curr == '\t') + if(m_doc_empty) { - _filter_ws(r, &i, &pos); + _c4dbgp("it's a map with an empty key"); + m_evt_handler->check_trailing_doc_token(); + _maybe_begin_doc(); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_map_val_block(); + m_evt_handler->set_key_scalar_plain({}); + m_doc_empty = false; + _save_indentation(); } - else if(curr == '\n') + else { - filtered_chars = _filter_nl(r, &i, &pos, /*indentation*/0); + _c4dbgp("actually prev val is a key!"); + size_t prev_indentation = m_evt_handler->m_curr->indref; + m_evt_handler->actually_val_is_first_key_of_new_map_block(); + _set_indentation(prev_indentation); } - else if(curr == '\r') // skip \r --- https://stackoverflow.com/questions/1885900 + addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else if(first == '&') + { + csubstr anchor = _scan_anchor(); + _c4dbgpf("anchor! [{}]~~~{}~~~", anchor.len, anchor); + m_evt_handler->check_trailing_doc_token(); + _maybe_begin_doc(); + const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem); + const size_t line = m_evt_handler->m_curr->pos.line; + _add_annotation(&m_pending_anchors, anchor, indentation, line); + _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem)); + m_doc_empty = false; + } + else if(first == '*') + { + csubstr ref = _scan_ref_map(); + _c4dbgpf("ref! [{}]~~~{}~~~", ref.len, ref); + m_evt_handler->check_trailing_doc_token(); + _maybe_begin_doc(); + m_doc_empty = false; + if(!_maybe_scan_following_colon()) { - ; + _c4dbgp("runk: set val ref"); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->set_val_ref(ref); } - else if(curr == '\'') + else { - char next = i+1 < r.len ? r[i+1] : '\0'; - if(next == '\'') + _c4dbgp("runk: start new block map, set ref as key"); + const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save + const size_t startline = m_evt_handler->m_curr->pos.line; // save + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->begin_map_val_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + m_evt_handler->set_key_ref(ref); + _maybe_skip_whitespace_tokens(); + _set_indentation(startindent); + addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC); + } + } + else if(first == '!') + { + csubstr tag = _scan_tag(); + _c4dbgpf("unk: val tag! [{}]~~~{}~~~", tag.len, tag); + // we need to buffer the tags, as there may be two + // consecutive tags in here + const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem); + const size_t line = m_evt_handler->m_curr->pos.line; + _add_annotation(&m_pending_tags, tag, indentation, line); + } + else + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL)); + _maybe_skip_whitespace_tokens(); + csubstr s = m_evt_handler->m_curr->line_contents.rem; + if(!s.len) + return; + const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save + const size_t startline = m_evt_handler->m_curr->pos.line; // save + first = s.str[0]; + ScannedScalar sc; + if(first == '\'') + { + _c4dbgp("runk: scanning single-quoted scalar"); + m_evt_handler->check_trailing_doc_token(); + _maybe_begin_doc(); + add_flags(RDOC); + m_doc_empty = false; + sc = _scan_scalar_squot(); + if(!_maybe_scan_following_colon()) + { + _c4dbgp("runk: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); + m_evt_handler->set_val_scalar_squoted(maybe_filtered); + } + else { - _c4dbgfsq("[{}]: two consecutive quotes", i); - filtered_chars = true; - m_filter_arena.str[pos++] = '\''; - ++i; + _c4dbgp("runk: start new block map, set scalar as key"); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->begin_map_val_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); + m_evt_handler->set_key_scalar_squoted(maybe_filtered); + _maybe_skip_whitespace_tokens(); + _set_indentation(startindent); + addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC); } } - else + else if(first == '"') + { + _c4dbgp("runk: scanning double-quoted scalar"); + m_evt_handler->check_trailing_doc_token(); + _maybe_begin_doc(); + add_flags(RDOC); + m_doc_empty = false; + sc = _scan_scalar_dquot(); + if(!_maybe_scan_following_colon()) + { + _c4dbgp("runk: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); + m_evt_handler->set_val_scalar_dquoted(maybe_filtered); + } + else + { + _c4dbgp("runk: start new block map, set double-quoted scalar as key"); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->begin_map_val_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); + m_evt_handler->set_key_scalar_dquoted(maybe_filtered); + _maybe_skip_whitespace_tokens(); + _set_indentation(startindent); + addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC); + } + } + else if(first == '|') + { + _c4dbgp("runk: scanning block-literal scalar"); + m_evt_handler->check_trailing_doc_token(); + _maybe_begin_doc(); + add_flags(RDOC); + m_doc_empty = false; + ScannedBlock sb; + _scan_block(&sb, startindent); + if(C4_LIKELY(!_maybe_scan_following_colon())) + { + _c4dbgp("runk: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb); + m_evt_handler->set_val_scalar_literal(maybe_filtered); + } + else + { + _c4err("block literal keys must be enclosed in '?'"); + } + } + else if(first == '>') { - m_filter_arena.str[pos++] = curr; + _c4dbgp("runk: scanning block-folded scalar"); + m_evt_handler->check_trailing_doc_token(); + _maybe_begin_doc(); + add_flags(RDOC); + m_doc_empty = false; + ScannedBlock sb; + _scan_block(&sb, startindent); + if(C4_LIKELY(!_maybe_scan_following_colon())) + { + _c4dbgp("runk: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb); + m_evt_handler->set_val_scalar_folded(maybe_filtered); + } + else + { + _c4err("block folded keys must be enclosed in '?'"); + } + } + else if(_scan_scalar_plain_unk(&sc)) + { + _c4dbgp("runk: got a plain scalar"); + m_evt_handler->check_trailing_doc_token(); + _maybe_begin_doc(); + add_flags(RDOC); + m_doc_empty = false; + if(!_maybe_scan_following_colon()) + { + _c4dbgp("runk: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent); + m_evt_handler->set_val_scalar_plain(maybe_filtered); + } + else + { + _c4dbgp("runk: start new block map, set scalar as key"); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->begin_map_val_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent); + m_evt_handler->set_key_scalar_plain(maybe_filtered); + _maybe_skip_whitespace_tokens(); + _set_indentation(startindent); + addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC); + } } } - - _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len); - if(pos < r.len || filtered_chars) - { - r = _finish_filter_arena(r, pos); - } - - _RYML_CB_ASSERT(m_stack.m_callbacks, s.len >= r.len); - _c4dbgpf(": #filteredchars={} after=~~~{}~~~", s.len - r.len, r); - - #undef _c4dbgfsq - return r; } //----------------------------------------------------------------------------- -csubstr Parser::_filter_dquot_scalar(substr s) + +template +C4_COLD void ParseEngine::_handle_usty() { - // a debugging scaffold: - #if 0 - #define _c4dbgfdq(...) _c4dbgpf("filt_dquo_scalar" __VA_ARGS__) - #else - #define _c4dbgfdq(...) + _c4dbgpf("handle_usty target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id); + + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK|FLOW)); + + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + if(has_any(RNXT)) + { + _c4dbgp("usty[RNXT]: finishing!"); + _end_stream(); + } #endif - _c4dbgfdq(": before=~~~{}~~~", s); + _maybe_skip_comment(); + csubstr rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + return; - // from the YAML spec for double-quoted scalars: - // https://yaml.org/spec/1.2-old/spec.html#style/flow/double-quoted - // - // All leading and trailing white space characters are excluded - // from the content. Each continuation line must therefore contain - // at least one non-space character. Empty lines, if any, are - // consumed as part of the line folding. + size_t pos = rem.first_not_of(" \t"); + if(pos) + { + pos = pos != npos ? pos : rem.len; + _c4dbgpf("skipping indentation of {}", pos); + _line_progressed(pos); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + return; + _c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem); + } - _grow_filter_arena(s.len + 2u * s.count('\\')); - substr r = s; - size_t pos = 0; // the filtered size - bool filtered_chars = false; - for(size_t i = 0; i < r.len; ++i) + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.len > 0); + size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save + char first = rem.str[0]; + if(has_any(RSEQ)) // destination is a sequence { - const char curr = r[i]; - _c4dbgfdq("[{}]: '{}'", i, _c4prc(curr)); - if(curr == ' ' || curr == '\t') + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(RMAP)); + _c4dbgpf("usty[RSEQ]: first='{}'", _c4prc(first)); + if(first == '[') { - _filter_ws(r, &i, &pos); + _c4dbgp("usty[RSEQ]: it's a flow seq. merging it"); + add_flags(RNXT); + m_evt_handler->_push(); + addrem_flags(FLOW|RVAL, RNXT|USTY); + _set_indentation(startindent); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); } - else if(curr == '\n') + else if(first == '-' && _is_blck_token(rem)) { - filtered_chars = _filter_nl(r, &i, &pos, /*indentation*/0); + _c4dbgp("usty[RSEQ]: it's a block seq. merging it"); + add_flags(RNXT); + m_evt_handler->_push(); + addrem_flags(BLCK|RVAL, RNXT|USTY); + _set_indentation(startindent); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); } - else if(curr == '\r') // skip \r --- https://stackoverflow.com/questions/1885900 + else { - ; + _c4err("can only parse a seq into an existing seq"); } - else if(curr == '\\') + } + else if(has_any(RMAP)) // destination is a map + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ)); + _c4dbgpf("usty[RMAP]: first='{}'", _c4prc(first)); + if(first == '{') { - char next = i+1 < r.len ? r[i+1] : '\0'; - _c4dbgfdq("[{}]: backslash, next='{}'", i, _c4prc(next)); - filtered_chars = true; - if(next == '\r') - { - if(i+2 < r.len && r[i+2] == '\n') - { - ++i; // newline escaped with \ -- skip both (add only one as i is loop-incremented) - next = '\n'; - _c4dbgfdq("[{}]: was \\r\\n, now next='\\n'", i); - } - } - // remember the loop will also increment i - if(next == '\n') + _c4dbgp("usty[RMAP]: it's a flow map. merging it"); + add_flags(RNXT); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->_push(); + addrem_flags(RMAP|FLOW|RKEY, RNXT|USTY); + _set_indentation(startindent); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else if(first == '?' && _is_blck_token(rem)) + { + _c4dbgp("usty[RMAP]: it's a block map + this key is complex"); + add_flags(RNXT); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->_push(); + addrem_flags(RMAP|BLCK|QMRK, RNXT|USTY); + m_was_inside_qmrk = true; + _save_indentation(); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else if(first == ':' && _is_blck_token(rem)) + { + _c4dbgp("usty[RMAP]: it's a map with an empty key"); + add_flags(RNXT); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->_push(); + m_evt_handler->set_key_scalar_plain({}); + addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY); + _save_indentation(); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else if(rem.begins_with('&')) + { + csubstr anchor = _scan_anchor(); + _c4dbgpf("usty[RMAP]: anchor! [{}]~~~{}~~~", anchor.len, anchor); + const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem); + const size_t line = m_evt_handler->m_curr->pos.line; + _add_annotation(&m_pending_anchors, anchor, indentation, line); + _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem)); + } + else if(first == '*') + { + csubstr ref = _scan_ref_map(); + _c4dbgpf("usty[RMAP]: ref! [{}]~~~{}~~~", ref.len, ref); + if(!_maybe_scan_following_colon()) { - size_t ii = i + 2; - for( ; ii < r.len; ++ii) - { - if(r.str[ii] == ' ' || r.str[ii] == '\t') // skip leading whitespace - ; - else - break; - } - i += ii - i - 1; + _c4err("cannot read a VAL to a map"); } - else if(next == '"' || next == '/' || next == ' ' || next == '\t') // escapes for json compatibility + else { - m_filter_arena.str[pos++] = next; - ++i; + _c4dbgp("usty[RMAP]: start new block map, set ref as key"); + const size_t startline = m_evt_handler->m_curr->pos.line; // save + add_flags(RNXT); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->_push(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + m_evt_handler->set_key_ref(ref); + _maybe_skip_whitespace_tokens(); + _set_indentation(startindent); + addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY); } - else if(next == '\r') + } + else if(first == '!') + { + csubstr tag = _scan_tag(); + _c4dbgpf("usty[RMAP]: val tag! [{}]~~~{}~~~", tag.len, tag); + // we need to buffer the tags, as there may be two + // consecutive tags in here + const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem); + const size_t line = m_evt_handler->m_curr->pos.line; + _add_annotation(&m_pending_tags, tag, indentation, line); + } + else if(first == '[' || (first == '-' && _is_blck_token(rem))) + { + _c4err("cannot parse a seq into an existing map"); + } + else + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL)); + startindent = m_evt_handler->m_curr->line_contents.indentation; // save + const size_t startline = m_evt_handler->m_curr->pos.line; // save + ScannedScalar sc; + _c4dbgpf("usty[RMAP]: maybe scalar. first='{}'", _c4prc(first)); + if(first == '\'') { - //++i; + _c4dbgp("usty[RMAP]: scanning single-quoted scalar"); + sc = _scan_scalar_squot(); + if(!_maybe_scan_following_colon()) + { + _c4err("cannot read a VAL to a map"); + } + else + { + _c4dbgp("usty[RMAP]: start new block map, set scalar as key"); + add_flags(RNXT); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->_push(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); + m_evt_handler->set_key_scalar_squoted(maybe_filtered); + _set_indentation(startindent); + addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY); + _maybe_skip_whitespace_tokens(); + } } - else if(next == 'n') + else if(first == '"') { - m_filter_arena.str[pos++] = '\n'; - ++i; + _c4dbgp("usty[RMAP]: scanning double-quoted scalar"); + sc = _scan_scalar_dquot(); + if(!_maybe_scan_following_colon()) + { + _c4err("cannot read a VAL to a map"); + } + else + { + _c4dbgp("usty[RMAP]: start new block map, set double-quoted scalar as key"); + add_flags(RNXT); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->_push(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); + m_evt_handler->set_key_scalar_dquoted(maybe_filtered); + _set_indentation(startindent); + addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY); + _maybe_skip_whitespace_tokens(); + } } - else if(next == 'r') + else if(first == '|') { - m_filter_arena.str[pos++] = '\r'; - ++i; // skip + _c4err("block literal keys must be enclosed in '?'"); } - else if(next == 't') + else if(first == '>') { - m_filter_arena.str[pos++] = '\t'; - ++i; + _c4err("block literal keys must be enclosed in '?'"); } - else if(next == '\\') + else if(_scan_scalar_plain_unk(&sc)) { - m_filter_arena.str[pos++] = '\\'; - ++i; - } - else if(next == 'x') // UTF8 - { - if(i + 1u + 2u >= r.len) - _c4err("\\x requires 2 hex digits"); - uint8_t byteval = {}; - if(!read_hex(r.sub(i + 2u, 2u), &byteval)) - _c4err("failed to read \\x codepoint"); - m_filter_arena.str[pos++] = *(char*)&byteval; - i += 1u + 2u; - } - else if(next == 'u') // UTF16 - { - if(i + 1u + 4u >= r.len) - _c4err("\\u requires 4 hex digits"); - char readbuf[8]; - csubstr codepoint = r.sub(i + 2u, 4u); - uint32_t codepoint_val = {}; - if(!read_hex(codepoint, &codepoint_val)) - _c4err("failed to parse \\u codepoint"); - size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val); - C4_ASSERT(numbytes <= 4); - memcpy(m_filter_arena.str + pos, readbuf, numbytes); - pos += numbytes; - i += 1u + 4u; - } - else if(next == 'U') // UTF32 - { - if(i + 1u + 8u >= r.len) - _c4err("\\U requires 8 hex digits"); - char readbuf[8]; - csubstr codepoint = r.sub(i + 2u, 8u); - uint32_t codepoint_val = {}; - if(!read_hex(codepoint, &codepoint_val)) - _c4err("failed to parse \\U codepoint"); - size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val); - C4_ASSERT(numbytes <= 4); - memcpy(m_filter_arena.str + pos, readbuf, numbytes); - pos += numbytes; - i += 1u + 8u; - } - // https://yaml.org/spec/1.2.2/#rule-c-ns-esc-char - else if(next == '0') - { - m_filter_arena.str[pos++] = '\0'; - ++i; + _c4dbgp("usty[RMAP]: got a plain scalar"); + if(!_maybe_scan_following_colon()) + { + _c4err("cannot read a VAL to a map"); + } + else + { + _c4dbgp("usty[RMAP]: start new block map, set scalar as key"); + add_flags(RNXT); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->_push(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent); + m_evt_handler->set_key_scalar_plain(maybe_filtered); + _set_indentation(startindent); + addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY); + _maybe_skip_whitespace_tokens(); + } } - else if(next == 'b') // backspace + else { - m_filter_arena.str[pos++] = '\b'; - ++i; + _c4err("parse error"); } - else if(next == 'f') // form feed + } + } + else // destination is unknown + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ)); + _c4dbgpf("usty[UNK]: first='{}'", _c4prc(first)); + if(first == '[') + { + _c4dbgp("usty[UNK]: it's a flow seq"); + add_flags(RNXT); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_seq_val_flow(); + addrem_flags(RSEQ|FLOW|RVAL, RNXT|USTY); + _set_indentation(startindent); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else if(first == '-' && _is_blck_token(rem)) + { + _c4dbgp("usty[UNK]: it's a block seq"); + add_flags(RNXT); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_seq_val_block(); + addrem_flags(RSEQ|BLCK|RVAL, RNXT|USTY); + _set_indentation(startindent); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else if(first == '{') + { + _c4dbgp("usty[UNK]: it's a flow map"); + add_flags(RNXT); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_map_val_flow(); + addrem_flags(RMAP|FLOW|RKEY, RNXT|USTY); + _set_indentation(startindent); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else if(first == '?' && _is_blck_token(rem)) + { + _c4dbgp("usty[UNK]: it's a map + this key is complex"); + add_flags(RNXT); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_map_val_block(); + addrem_flags(RMAP|BLCK|QMRK, RNXT|USTY); + m_was_inside_qmrk = true; + _save_indentation(); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else if(first == ':' && _is_blck_token(rem)) + { + _c4dbgp("usty[UNK]: it's a map with an empty key"); + add_flags(RNXT); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_map_val_block(); + m_evt_handler->set_key_scalar_plain({}); + addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY); + _save_indentation(); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else if(first == '&') + { + csubstr anchor = _scan_anchor(); + _c4dbgpf("usty[UNK]: anchor! [{}]~~~{}~~~", anchor.len, anchor); + const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem); + const size_t line = m_evt_handler->m_curr->pos.line; + _add_annotation(&m_pending_anchors, anchor, indentation, line); + _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem)); + } + else if(first == '*') + { + csubstr ref = _scan_ref_map(); + _c4dbgpf("usty[UNK]: ref! [{}]~~~{}~~~", ref.len, ref); + if(!_maybe_scan_following_colon()) { - m_filter_arena.str[pos++] = '\f'; - ++i; + _c4dbgp("usty[UNK]: set val ref"); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->set_val_ref(ref); } - else if(next == 'a') // bell character + else { - m_filter_arena.str[pos++] = '\a'; - ++i; + _c4dbgp("usty[UNK]: start new block map, set ref as key"); + const size_t startline = m_evt_handler->m_curr->pos.line; // save + add_flags(RNXT); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->begin_map_val_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + m_evt_handler->set_key_ref(ref); + _maybe_skip_whitespace_tokens(); + _set_indentation(startindent); + addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY); } - else if(next == 'v') // vertical tab - { - m_filter_arena.str[pos++] = '\v'; - ++i; + } + else if(first == '!') + { + csubstr tag = _scan_tag(); + _c4dbgpf("usty[UNK]: val tag! [{}]~~~{}~~~", tag.len, tag); + // we need to buffer the tags, as there may be two + // consecutive tags in here + const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem); + const size_t line = m_evt_handler->m_curr->pos.line; + _add_annotation(&m_pending_tags, tag, indentation, line); + } + else + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL)); + startindent = m_evt_handler->m_curr->line_contents.indentation; // save + const size_t startline = m_evt_handler->m_curr->pos.line; // save + first = rem.str[0]; + ScannedScalar sc; + _c4dbgpf("usty[UNK]: maybe scalar. first='{}'", _c4prc(first)); + if(first == '\'') + { + _c4dbgp("usty[UNK]: scanning single-quoted scalar"); + sc = _scan_scalar_squot(); + if(!_maybe_scan_following_colon()) + { + _c4dbgp("usty[UNK]: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); + m_evt_handler->set_val_scalar_squoted(maybe_filtered); + _end_stream(); + } + else + { + _c4dbgp("usty[UNK]: start new block map, set scalar as key"); + add_flags(RNXT); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->begin_map_val_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); + m_evt_handler->set_key_scalar_squoted(maybe_filtered); + _set_indentation(startindent); + addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY); + _maybe_skip_whitespace_tokens(); + } } - else if(next == 'e') // escape character + else if(first == '"') { - m_filter_arena.str[pos++] = '\x1b'; - ++i; + _c4dbgp("usty[UNK]: scanning double-quoted scalar"); + sc = _scan_scalar_dquot(); + if(!_maybe_scan_following_colon()) + { + _c4dbgp("usty[UNK]: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); + m_evt_handler->set_val_scalar_dquoted(maybe_filtered); + _end_stream(); + } + else + { + _c4dbgp("usty[UNK]: start new block map, set double-quoted scalar as key"); + add_flags(RNXT); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->begin_map_val_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); + m_evt_handler->set_key_scalar_dquoted(maybe_filtered); + _set_indentation(startindent); + addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY); + _maybe_skip_whitespace_tokens(); + } } - else if(next == '_') // unicode non breaking space \u00a0 + else if(first == '|') { - // https://www.compart.com/en/unicode/U+00a0 - m_filter_arena.str[pos++] = _RYML_CHCONST(-0x3e, 0xc2); - m_filter_arena.str[pos++] = _RYML_CHCONST(-0x60, 0xa0); - ++i; + _c4dbgp("usty[UNK]: scanning block-literal scalar"); + ScannedBlock sb; + _scan_block(&sb, startindent); + _c4dbgp("usty[UNK]: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb); + m_evt_handler->set_val_scalar_literal(maybe_filtered); + _end_stream(); } - else if(next == 'N') // unicode next line \u0085 + else if(first == '>') { - // https://www.compart.com/en/unicode/U+0085 - m_filter_arena.str[pos++] = _RYML_CHCONST(-0x3e, 0xc2); - m_filter_arena.str[pos++] = _RYML_CHCONST(-0x7b, 0x85); - ++i; + _c4dbgp("usty[UNK]: scanning block-folded scalar"); + ScannedBlock sb; + _scan_block(&sb, startindent); + _c4dbgp("usty[UNK]: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb); + m_evt_handler->set_val_scalar_folded(maybe_filtered); + _end_stream(); } - else if(next == 'L') // unicode line separator \u2028 + else if(_scan_scalar_plain_unk(&sc)) { - // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex - m_filter_arena.str[pos++] = _RYML_CHCONST(-0x1e, 0xe2); - m_filter_arena.str[pos++] = _RYML_CHCONST(-0x80, 0x80); - m_filter_arena.str[pos++] = _RYML_CHCONST(-0x58, 0xa8); - ++i; + _c4dbgp("usty[UNK]: got a plain scalar"); + if(!_maybe_scan_following_colon()) + { + _c4dbgp("usty[UNK]: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent); + m_evt_handler->set_val_scalar_plain(maybe_filtered); + _end_stream(); + } + else + { + _c4dbgp("usty[UNK]: start new block map, set scalar as key"); + add_flags(RNXT); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->begin_map_val_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent); + m_evt_handler->set_key_scalar_plain(maybe_filtered); + _set_indentation(startindent); + addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY); + _maybe_skip_whitespace_tokens(); + } } - else if(next == 'P') // unicode paragraph separator \u2029 + else { - // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex - m_filter_arena.str[pos++] = _RYML_CHCONST(-0x1e, 0xe2); - m_filter_arena.str[pos++] = _RYML_CHCONST(-0x80, 0x80); - m_filter_arena.str[pos++] = _RYML_CHCONST(-0x57, 0xa9); - ++i; + _c4err("parse error"); } - _c4dbgfdq("[{}]: backslash...sofar=[{}]~~~{}~~~", i, pos, m_filter_arena.first(pos)); - } - else - { - m_filter_arena.str[pos++] = curr; } } - - _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len); - if(pos < r.len || filtered_chars) - { - r = _finish_filter_arena(r, pos); - } - - _RYML_CB_ASSERT(m_stack.m_callbacks, s.len >= r.len); - _c4dbgpf(": #filteredchars={} after=~~~{}~~~", s.len - r.len, r); - - #undef _c4dbgfdq - - return r; } //----------------------------------------------------------------------------- -bool Parser::_apply_chomp(substr buf, size_t *C4_RESTRICT pos, BlockChomp_e chomp) + +template +void ParseEngine::parse_json_in_place_ev(csubstr filename, substr src) { - substr trimmed = buf.first(*pos).trimr('\n'); - bool added_newline = false; - switch(chomp) + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1); + m_file = filename; + m_buf = src; + _reset(); + m_evt_handler->start_parse(filename.str, &_s_relocate_arena, this); + m_evt_handler->begin_stream(); + while( ! _finished_file()) { - case CHOMP_KEEP: - if(trimmed.len == *pos) - { - _c4dbgpf("chomp=KEEP: add missing newline @{}", *pos); - //m_filter_arena.str[(*pos)++] = '\n'; - added_newline = true; - } - break; - case CHOMP_CLIP: - if(trimmed.len == *pos) - { - _c4dbgpf("chomp=CLIP: add missing newline @{}", *pos); - m_filter_arena.str[(*pos)++] = '\n'; - added_newline = true; - } - else + _scan_line(); + while( ! _finished_line()) { - _c4dbgpf("chomp=CLIP: include single trailing newline @{}", trimmed.len+1); - *pos = trimmed.len + 1; + _c4dbgnextline(); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty()); + if(has_any(RSEQ)) + { + _handle_seq_json(); + } + else if(has_any(RMAP)) + { + _handle_map_json(); + } + else if(has_any(RUNK)) + { + _handle_unk_json(); + } + else + { + _c4err("internal error"); + } } - break; - case CHOMP_STRIP: - _c4dbgpf("chomp=STRIP: strip {}-{}-{} newlines", *pos, trimmed.len, *pos-trimmed.len); - *pos = trimmed.len; - break; - default: - _c4err("unknown chomp style"); + if(_finished_file()) + break; // it may have finished because of multiline blocks + _line_ended(); } - return added_newline; + _end_stream(); + m_evt_handler->finish_parse(); } //----------------------------------------------------------------------------- -csubstr Parser::_filter_block_scalar(substr s, BlockStyle_e style, BlockChomp_e chomp, size_t indentation) -{ - // a debugging scaffold: - #if 0 - #define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block" fmt, __VA_ARGS__) - #else - #define _c4dbgfbl(...) - #endif - - _c4dbgfbl(": indentation={} before=[{}]~~~{}~~~", indentation, s.len, s); - - if(chomp != CHOMP_KEEP && s.trim(" \n\r").len == 0u) - { - _c4dbgp("filt_block: empty scalar"); - return s.first(0); - } - - substr r = s; - switch(style) +template +void ParseEngine::parse_in_place_ev(csubstr filename, substr src) +{ + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1); + m_file = filename; + m_buf = src; + _reset(); + m_evt_handler->start_parse(filename.str, &_s_relocate_arena, this); + m_evt_handler->begin_stream(); + while( ! _finished_file()) { - case BLOCK_LITERAL: + _scan_line(); + while( ! _finished_line()) { - _c4dbgp("filt_block: style=literal"); - // trim leading whitespace up to indentation + _c4dbgnextline(); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty()); + if(has_any(FLOW)) { - size_t numws = r.first_not_of(' '); - if(numws != npos) - { - if(numws > indentation) - r = r.sub(indentation); - else - r = r.sub(numws); - _c4dbgfbl(": after triml=[{}]~~~{}~~~", r.len, r); - } - else + if(has_none(RSEQIMAP)) { - if(chomp != CHOMP_KEEP || r.len == 0) + if(has_any(RSEQ)) { - _c4dbgfbl(": all spaces {}, return empty", r.len); - return r.first(0); + _handle_seq_flow(); } else { - r[0] = '\n'; - return r.first(1); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP)); + _handle_map_flow(); } } - } - _grow_filter_arena(s.len + 2u); // use s.len! because we may need to add a newline at the end, so the leading indentation will allow space for that newline - size_t pos = 0; // the filtered size - for(size_t i = 0; i < r.len; ++i) - { - const char curr = r.str[i]; - _c4dbgfbl("[{}]='{}' pos={}", i, _c4prc(curr), pos); - if(curr == '\r') - continue; - m_filter_arena.str[pos++] = curr; - if(curr == '\n') + else { - _c4dbgfbl("[{}]: found newline", i); - // skip indentation on the next line - csubstr rem = r.sub(i+1); - size_t first = rem.first_not_of(' '); - if(first != npos) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, first < rem.len); - _RYML_CB_ASSERT(m_stack.m_callbacks, i+1+first < r.len); - _c4dbgfbl("[{}]: {} spaces follow before next nonws character @ [{}]='{}'", i, first, i+1+first, rem.str[first]); - if(first < indentation) - { - _c4dbgfbl("[{}]: skip {}<{} spaces from indentation", i, first, indentation); - i += first; - } - else - { - _c4dbgfbl("[{}]: skip {} spaces from indentation", i, indentation); - i += indentation; - } - } - else - { - _RYML_CB_ASSERT(m_stack.m_callbacks, i+1 <= r.len); - first = rem.len; - _c4dbgfbl("[{}]: {} spaces to the end", i, first); - if(first) - { - if(first < indentation) - { - _c4dbgfbl("[{}]: skip everything", i); - --pos; - break; - } - else - { - _c4dbgfbl("[{}]: skip {} spaces from indentation", i, indentation); - i += indentation; - } - } - else if(i+1 == r.len) - { - if(chomp == CHOMP_STRIP) - --pos; - break; - } - } + _handle_seq_imap(); } } - _RYML_CB_ASSERT(m_stack.m_callbacks, s.len >= pos); - _c4dbgfbl(": #filteredchars={} after=~~~{}~~~", s.len - r.len, r); - bool changed = _apply_chomp(m_filter_arena, &pos, chomp); - _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len); - _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= s.len); - if(pos < r.len || changed) - { - r = _finish_filter_arena(s, pos); // write into s - } - break; - } - case BLOCK_FOLD: - { - _c4dbgp("filt_block: style=fold"); - _grow_filter_arena(r.len + 2); - size_t pos = 0; // the filtered size - bool filtered_chars = false; - bool started = false; - bool is_indented = false; - size_t i = r.first_not_of(' '); - _c4dbgfbl(": first non space at {}", i); - if(i > indentation) - { - is_indented = true; - i = indentation; - } - _c4dbgfbl(": start folding at {}, is_indented={}", i, (int)is_indented); - auto on_change_indentation = [&](size_t numnl_following, size_t last_newl, size_t first_non_whitespace){ - _c4dbgfbl("[{}]: add 1+{} newlines", i, numnl_following); - for(size_t j = 0; j < 1 + numnl_following; ++j) - m_filter_arena.str[pos++] = '\n'; - for(i = last_newl + 1 + indentation; i < first_non_whitespace; ++i) - { - if(r.str[i] == '\r') - continue; - _c4dbgfbl("[{}]: add '{}'", i, _c4prc(r.str[i])); - m_filter_arena.str[pos++] = r.str[i]; - } - --i; - }; - for( ; i < r.len; ++i) + else if(has_any(BLCK)) { - const char curr = r.str[i]; - _c4dbgfbl("[{}]='{}'", i, _c4prc(curr)); - if(curr == '\n') + if(has_any(RSEQ)) { - filtered_chars = true; - // skip indentation on the next line, and advance over the next non-indented blank lines as well - size_t first_non_whitespace; - size_t numnl_following = (size_t)-1; - while(r[i] == '\n') - { - ++numnl_following; - csubstr rem = r.sub(i+1); - size_t first = rem.first_not_of(' '); - _c4dbgfbl("[{}]: found newline. first={} rem.len={}", i, first, rem.len); - if(first != npos) - { - first_non_whitespace = first + i+1; - while(first_non_whitespace < r.len && r[first_non_whitespace] == '\r') - ++first_non_whitespace; - _RYML_CB_ASSERT(m_stack.m_callbacks, first < rem.len); - _RYML_CB_ASSERT(m_stack.m_callbacks, i+1+first < r.len); - _c4dbgfbl("[{}]: {} spaces follow before next nonws character @ [{}]='{}'", i, first, i+1+first, _c4prc(rem.str[first])); - if(first < indentation) - { - _c4dbgfbl("[{}]: skip {}<{} spaces from indentation", i, first, indentation); - i += first; - } - else - { - _c4dbgfbl("[{}]: skip {} spaces from indentation", i, indentation); - i += indentation; - if(first > indentation) - { - _c4dbgfbl("[{}]: {} further indented than {}, stop newlining", i, first, indentation); - goto finished_counting_newlines; - } - } - // prepare the next while loop iteration - // by setting i at the next newline after - // an empty line - if(r[first_non_whitespace] == '\n') - i = first_non_whitespace; - else - goto finished_counting_newlines; - } - else - { - _RYML_CB_ASSERT(m_stack.m_callbacks, i+1 <= r.len); - first = rem.len; - first_non_whitespace = first + i+1; - if(first) - { - _c4dbgfbl("[{}]: {} spaces to the end", i, first); - if(first < indentation) - { - _c4dbgfbl("[{}]: skip everything", i); - i += first; - } - else - { - _c4dbgfbl("[{}]: skip {} spaces from indentation", i, indentation); - i += indentation; - if(first > indentation) - { - _c4dbgfbl("[{}]: {} spaces missing. not done yet", i, indentation - first); - goto finished_counting_newlines; - } - } - } - else // if(i+1 == r.len) - { - _c4dbgfbl("[{}]: it's the final newline", i); - _RYML_CB_ASSERT(m_stack.m_callbacks, i+1 == r.len); - _RYML_CB_ASSERT(m_stack.m_callbacks, rem.len == 0); - } - goto end_of_scalar; - } - } - end_of_scalar: - // Write all the trailing newlines. Since we're - // at the end no folding is needed, so write every - // newline (add 1). - _c4dbgfbl("[{}]: add {} trailing newlines", i, 1+numnl_following); - for(size_t j = 0; j < 1 + numnl_following; ++j) - m_filter_arena.str[pos++] = '\n'; - break; - finished_counting_newlines: - _c4dbgfbl("[{}]: #newlines={} firstnonws={}", i, numnl_following, first_non_whitespace); - while(first_non_whitespace < r.len && r[first_non_whitespace] == '\t') - ++first_non_whitespace; - _c4dbgfbl("[{}]: #newlines={} firstnonws={}", i, numnl_following, first_non_whitespace); - _RYML_CB_ASSERT(m_stack.m_callbacks, first_non_whitespace <= r.len); - size_t last_newl = r.last_of('\n', first_non_whitespace); - size_t this_indentation = first_non_whitespace - last_newl - 1; - _c4dbgfbl("[{}]: #newlines={} firstnonws={} lastnewl={} this_indentation={} vs indentation={}", i, numnl_following, first_non_whitespace, last_newl, this_indentation, indentation); - _RYML_CB_ASSERT(m_stack.m_callbacks, first_non_whitespace >= last_newl + 1); - _RYML_CB_ASSERT(m_stack.m_callbacks, this_indentation >= indentation); - if(!started) - { - _c4dbgfbl("[{}]: #newlines={}. write all leading newlines", i, numnl_following); - for(size_t j = 0; j < 1 + numnl_following; ++j) - m_filter_arena.str[pos++] = '\n'; - if(this_indentation > indentation) - { - is_indented = true; - _c4dbgfbl("[{}]: advance ->{}", i, last_newl + indentation); - i = last_newl + indentation; - } - else - { - i = first_non_whitespace - 1; - _c4dbgfbl("[{}]: advance ->{}", i, first_non_whitespace); - } - } - else if(this_indentation == indentation) - { - _c4dbgfbl("[{}]: same indentation", i); - if(!is_indented) - { - if(numnl_following == 0) - { - _c4dbgfbl("[{}]: fold!", i); - m_filter_arena.str[pos++] = ' '; - } - else - { - _c4dbgfbl("[{}]: add {} newlines", i, 1 + numnl_following); - for(size_t j = 0; j < numnl_following; ++j) - m_filter_arena.str[pos++] = '\n'; - } - i = first_non_whitespace - 1; - _c4dbgfbl("[{}]: advance {}->{}", i, i, first_non_whitespace); - } - else - { - _c4dbgfbl("[{}]: back to ref indentation", i); - is_indented = false; - on_change_indentation(numnl_following, last_newl, first_non_whitespace); - _c4dbgfbl("[{}]: advance {}->{}", i, i, first_non_whitespace); - } - } - else - { - _c4dbgfbl("[{}]: increased indentation.", i); - is_indented = true; - _RYML_CB_ASSERT(m_stack.m_callbacks, this_indentation > indentation); - on_change_indentation(numnl_following, last_newl, first_non_whitespace); - _c4dbgfbl("[{}]: advance {}->{}", i, i, first_non_whitespace); - } + _handle_seq_block(); } - else if(curr != '\r') + else { - if(curr != '\t') - started = true; - m_filter_arena.str[pos++] = curr; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP)); + _handle_map_block(); } } - _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len); - _c4dbgfbl(": #filteredchars={} after=[{}]~~~{}~~~", (int)s.len - (int)pos, pos, m_filter_arena.first(pos)); - bool changed = _apply_chomp(m_filter_arena, &pos, chomp); - if(pos < r.len || filtered_chars || changed) + else if(has_any(RUNK)) { - r = _finish_filter_arena(s, pos); // write into s + _handle_unk(); + } + else if(has_any(USTY)) + { + _handle_usty(); + } + else + { + _c4err("internal error"); } } - break; - default: - _c4err("unknown block style"); - } - - _c4dbgfbl(": final=[{}]~~~{}~~~", r.len, r); - - #undef _c4dbgfbl - - return r; -} - -//----------------------------------------------------------------------------- -size_t Parser::_count_nlines(csubstr src) -{ - return 1 + src.count('\n'); -} - -//----------------------------------------------------------------------------- -void Parser::_handle_directive(csubstr directive_) -{ - csubstr directive = directive_; - if(directive.begins_with("%TAG")) - { - TagDirective td; - _c4dbgpf("%TAG directive: {}", directive_); - directive = directive.sub(4); - if(!directive.begins_with(' ')) - _c4err("malformed tag directive: {}", directive_); - directive = directive.triml(' '); - size_t pos = directive.find(' '); - if(pos == npos) - _c4err("malformed tag directive: {}", directive_); - td.handle = directive.first(pos); - directive = directive.sub(td.handle.len).triml(' '); - pos = directive.find(' '); - if(pos != npos) - directive = directive.first(pos); - td.prefix = directive; - td.next_node_id = m_tree->size(); - if(m_tree->size() > 0) - { - size_t prev = m_tree->size() - 1; - if(m_tree->is_root(prev) && m_tree->type(prev) != NOTYPE && !m_tree->is_stream(prev)) - ++td.next_node_id; - } - _c4dbgpf("%TAG: handle={} prefix={} next_node={}", td.handle, td.prefix, td.next_node_id); - m_tree->add_tag_directive(td); - } - else if(directive.begins_with("%YAML")) - { - _c4dbgpf("%YAML directive! ignoring...: {}", directive); + if(_finished_file()) + break; // it may have finished because of multiline blocks + _line_ended(); } + _end_stream(); + m_evt_handler->finish_parse(); } -//----------------------------------------------------------------------------- -void Parser::set_flags(flag_t f, State * s) -{ -#ifdef RYML_DBG - char buf1_[64], buf2_[64]; - csubstr buf1 = _prfl(buf1_, f); - csubstr buf2 = _prfl(buf2_, s->flags); - _c4dbgpf("state[{}]: setting flags to {}: before={}", s-m_stack.begin(), buf1, buf2); -#endif - s->flags = f; -} - -void Parser::add_flags(flag_t on, State * s) -{ -#ifdef RYML_DBG - char buf1_[64], buf2_[64], buf3_[64]; - csubstr buf1 = _prfl(buf1_, on); - csubstr buf2 = _prfl(buf2_, s->flags); - csubstr buf3 = _prfl(buf3_, s->flags|on); - _c4dbgpf("state[{}]: adding flags {}: before={} after={}", s-m_stack.begin(), buf1, buf2, buf3); -#endif - s->flags |= on; -} - -void Parser::addrem_flags(flag_t on, flag_t off, State * s) -{ -#ifdef RYML_DBG - char buf1_[64], buf2_[64], buf3_[64], buf4_[64]; - csubstr buf1 = _prfl(buf1_, on); - csubstr buf2 = _prfl(buf2_, off); - csubstr buf3 = _prfl(buf3_, s->flags); - csubstr buf4 = _prfl(buf4_, ((s->flags|on)&(~off))); - _c4dbgpf("state[{}]: adding flags {} / removing flags {}: before={} after={}", s-m_stack.begin(), buf1, buf2, buf3, buf4); -#endif - s->flags |= on; - s->flags &= ~off; -} - -void Parser::rem_flags(flag_t off, State * s) -{ -#ifdef RYML_DBG - char buf1_[64], buf2_[64], buf3_[64]; - csubstr buf1 = _prfl(buf1_, off); - csubstr buf2 = _prfl(buf2_, s->flags); - csubstr buf3 = _prfl(buf3_, s->flags&(~off)); - _c4dbgpf("state[{}]: removing flags {}: before={} after={}", s-m_stack.begin(), buf1, buf2, buf3); -#endif - s->flags &= ~off; -} - -//----------------------------------------------------------------------------- - -csubstr Parser::_prfl(substr buf, flag_t flags) -{ - size_t pos = 0; - bool gotone = false; - - #define _prflag(fl) \ - if((flags & fl) == (fl)) \ - { \ - if(gotone) \ - { \ - if(pos + 1 < buf.len) \ - buf[pos] = '|'; \ - ++pos; \ - } \ - csubstr fltxt = #fl; \ - if(pos + fltxt.len <= buf.len) \ - memcpy(buf.str + pos, fltxt.str, fltxt.len); \ - pos += fltxt.len; \ - gotone = true; \ - } - - _prflag(RTOP); - _prflag(RUNK); - _prflag(RMAP); - _prflag(RSEQ); - _prflag(FLOW); - _prflag(QMRK); - _prflag(RKEY); - _prflag(RVAL); - _prflag(RNXT); - _prflag(SSCL); - _prflag(QSCL); - _prflag(RSET); - _prflag(NDOC); - _prflag(RSEQIMAP); +} // namespace yml +} // namespace c4 - #undef _prflag +#undef _c4dbgnextline - RYML_ASSERT(pos <= buf.len); +#if defined(_MSC_VER) +# pragma warning(pop) +#elif defined(__clang__) +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#endif - return buf.first(pos); -} +#endif // _C4_YML_PARSE_ENGINE_DEF_HPP_ -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/parse_engine.def.hpp) -void Parser::_grow_filter_arena(size_t num_characters_needed) -{ - _c4dbgpf("grow: arena={} numchars={}", m_filter_arena.len, num_characters_needed); - if(num_characters_needed <= m_filter_arena.len) - return; - size_t sz = m_filter_arena.len << 1; - _c4dbgpf("grow: sz={}", sz); - sz = num_characters_needed > sz ? num_characters_needed : sz; - _c4dbgpf("grow: sz={}", sz); - sz = sz < 128u ? 128u : sz; - _c4dbgpf("grow: sz={}", sz); - _RYML_CB_ASSERT(m_stack.m_callbacks, sz >= num_characters_needed); - _resize_filter_arena(sz); -} -void Parser::_resize_filter_arena(size_t num_characters) -{ - if(num_characters > m_filter_arena.len) - { - _c4dbgpf("resize: sz={}", num_characters); - char *prev = m_filter_arena.str; - if(m_filter_arena.str) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, m_filter_arena.len > 0); - _RYML_CB_FREE(m_stack.m_callbacks, m_filter_arena.str, char, m_filter_arena.len); - } - m_filter_arena.str = _RYML_CB_ALLOC_HINT(m_stack.m_callbacks, char, num_characters, prev); - m_filter_arena.len = num_characters; - } -} -substr Parser::_finish_filter_arena(substr dst, size_t pos) -{ - _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len); - _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= dst.len); - memcpy(dst.str, m_filter_arena.str, pos); - return dst.first(pos); -} +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/reference_resolver.cpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/reference_resolver.cpp +//-------------------------------------------------------------------------------- +//******************************************************************************** +#ifdef RYML_SINGLE_HDR_DEFINE_NOW +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/reference_resolver.hpp +//#include "c4/yml/reference_resolver.hpp" +#if !defined(C4_YML_REFERENCE_RESOLVER_HPP_) && !defined(_C4_YML_REFERENCE_RESOLVER_HPP_) +#error "amalgamate: file c4/yml/reference_resolver.hpp must have been included at this point" +#endif /* C4_YML_REFERENCE_RESOLVER_HPP_ */ -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/dump.hpp +//#include "c4/dump.hpp" // this is needed to resolve a function in the next header +#if !defined(C4_DUMP_HPP_) && !defined(_C4_DUMP_HPP_) +#error "amalgamate: file c4/dump.hpp must have been included at this point" +#endif /* C4_DUMP_HPP_ */ -csubstr Parser::location_contents(Location const& loc) const -{ - _RYML_CB_ASSERT(m_stack.m_callbacks, loc.offset < m_buf.len); - return m_buf.sub(loc.offset); -} +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/common.hpp +//#include "c4/yml/common.hpp" +#if !defined(C4_YML_COMMON_HPP_) && !defined(_C4_YML_COMMON_HPP_) +#error "amalgamate: file c4/yml/common.hpp must have been included at this point" +#endif /* C4_YML_COMMON_HPP_ */ -Location Parser::location(ConstNodeRef node) const -{ - _RYML_CB_ASSERT(m_stack.m_callbacks, node.valid()); - return location(*node.tree(), node.id()); -} +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp +//#include "c4/yml/detail/parser_dbg.hpp" +#if !defined(C4_YML_DETAIL_PARSER_DBG_HPP_) && !defined(_C4_YML_DETAIL_PARSER_DBG_HPP_) +#error "amalgamate: file c4/yml/detail/parser_dbg.hpp must have been included at this point" +#endif /* C4_YML_DETAIL_PARSER_DBG_HPP_ */ -Location Parser::location(Tree const& tree, size_t node) const -{ - // try hard to avoid getting the location from a null string. - Location loc; - if(_location_from_node(tree, node, &loc, 0)) - return loc; - return val_location(m_buf.str); -} +#ifdef RYML_DBG +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/print.hpp +//#include "c4/yml/detail/print.hpp" +#if !defined(C4_YML_DETAIL_PRINT_HPP_) && !defined(_C4_YML_DETAIL_PRINT_HPP_) +#error "amalgamate: file c4/yml/detail/print.hpp must have been included at this point" +#endif /* C4_YML_DETAIL_PRINT_HPP_ */ -bool Parser::_location_from_node(Tree const& tree, size_t node, Location *C4_RESTRICT loc, size_t level) const -{ - if(tree.has_key(node)) - { - csubstr k = tree.key(node); - if(C4_LIKELY(k.str != nullptr)) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, k.is_sub(m_buf)); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(k)); - *loc = val_location(k.str); - return true; - } - } +#else +#define _c4dbg_tree(...) +#define _c4dbg_node(...) +#endif - if(tree.has_val(node)) - { - csubstr v = tree.val(node); - if(C4_LIKELY(v.str != nullptr)) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, v.is_sub(m_buf)); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(v)); - *loc = val_location(v.str); - return true; - } - } +namespace c4 { +namespace yml { - if(tree.is_container(node)) - { - if(_location_from_cont(tree, node, loc)) - return true; - } +id_type ReferenceResolver::count_anchors_and_refs_(id_type n) +{ + id_type c = 0; + c += m_tree->has_key_anchor(n); + c += m_tree->has_val_anchor(n); + c += m_tree->is_key_ref(n); + c += m_tree->is_val_ref(n); + c += m_tree->has_key(n) && m_tree->key(n) == "<<"; + for(id_type ch = m_tree->first_child(n); ch != NONE; ch = m_tree->next_sibling(ch)) + c += count_anchors_and_refs_(ch); + return c; +} - if(tree.type(node) != NOTYPE && level == 0) +void ReferenceResolver::gather_anchors_and_refs__(id_type n) +{ + // insert key refs BEFORE inserting val refs + if(m_tree->has_key(n)) { - // try the prev sibling + if(m_tree->key(n) == "<<") { - const size_t prev = tree.prev_sibling(node); - if(prev != NONE) + _c4dbgpf("node[{}]: key is <<", n); + if(m_tree->has_val(n)) { - if(_location_from_node(tree, prev, loc, level+1)) - return true; + if(m_tree->is_val_ref(n)) + { + _c4dbgpf("node[{}]: val ref, inheriting!", n); + m_refs.push({VALREF, n, NONE, NONE, NONE, NONE}); + //m_refs.push({KEYREF, n, NONE, NONE, NONE, NONE}); + } + else + { + _c4dbgpf("node[{}]: not ref!", n); + } } - } - // try the next sibling - { - const size_t next = tree.next_sibling(node); - if(next != NONE) + else if(m_tree->is_seq(n)) { - if(_location_from_node(tree, next, loc, level+1)) - return true; + // for merging multiple inheritance targets + // <<: [ *CENTER, *BIG ] + _c4dbgpf("node[{}]: is seq!", n); + for(id_type ich = m_tree->first_child(n); ich != NONE; ich = m_tree->next_sibling(ich)) + { + _c4dbgpf("node[{}]: val ref, inheriting multiple: {}", n, ich); + if(m_tree->is_container(ich)) + { + detail::_report_err(m_tree->m_callbacks, "ERROR: node {} child {}: refs for << cannot be containers.'", n, ich); + C4_UNREACHABLE_AFTER_ERR(); + } + m_refs.push({VALREF, ich, NONE, NONE, n, m_tree->next_sibling(n)}); + } + return; // don't descend into the seq } - } - // try the parent - { - const size_t parent = tree.parent(node); - if(parent != NONE) + else { - if(_location_from_node(tree, parent, loc, level+1)) - return true; + detail::_report_err(m_tree->m_callbacks, "ERROR: node {}: refs for << must be either val or seq", n); + C4_UNREACHABLE_AFTER_ERR(); } } + else if(m_tree->is_key_ref(n)) + { + _c4dbgpf("node[{}]: key ref: '{}'", n, m_tree->key_ref(n)); + _RYML_CB_ASSERT(m_tree->m_callbacks, m_tree->key(n) != "<<"); + _RYML_CB_CHECK(m_tree->m_callbacks, (!m_tree->has_key(n)) || m_tree->key(n).ends_with(m_tree->key_ref(n))); + m_refs.push({KEYREF, n, NONE, NONE, NONE, NONE}); + } + } + // val ref + if(m_tree->is_val_ref(n) && (!m_tree->has_key(n) || m_tree->key(n) != "<<")) + { + _c4dbgpf("node[{}]: val ref: '{}'", n, m_tree->val_ref(n)); + RYML_CHECK((!m_tree->has_val(n)) || m_tree->val(n).ends_with(m_tree->val_ref(n))); + m_refs.push({VALREF, n, NONE, NONE, NONE, NONE}); + } + // anchors + if(m_tree->has_key_anchor(n)) + { + _c4dbgpf("node[{}]: key anchor: '{}'", n, m_tree->key_anchor(n)); + RYML_CHECK(m_tree->has_key(n)); + m_refs.push({KEYANCH, n, NONE, NONE, NONE, NONE}); + } + if(m_tree->has_val_anchor(n)) + { + _c4dbgpf("node[{}]: val anchor: '{}'", n, m_tree->val_anchor(n)); + RYML_CHECK(m_tree->has_val(n) || m_tree->is_container(n)); + m_refs.push({VALANCH, n, NONE, NONE, NONE, NONE}); } + // recurse + for(id_type ch = m_tree->first_child(n); ch != NONE; ch = m_tree->next_sibling(ch)) + gather_anchors_and_refs__(ch); +} - return false; +void ReferenceResolver::gather_anchors_and_refs_() +{ + _c4dbgp("gathering anchors and refs..."); + + // minimize (re-)allocations by counting first + id_type num_anchors_and_refs = count_anchors_and_refs_(m_tree->root_id()); + if(!num_anchors_and_refs) + return; + m_refs.reserve(num_anchors_and_refs); + m_refs.clear(); + + // now descend through the hierarchy + gather_anchors_and_refs__(m_tree->root_id()); + + _c4dbgpf("found {} anchors/refs", m_refs.size()); + + // finally connect the reference list + id_type prev_anchor = NONE; + id_type count = 0; + for(auto &rd : m_refs) + { + rd.prev_anchor = prev_anchor; + if(rd.type.has_anchor()) + prev_anchor = count; + ++count; + } + _c4dbgp("gathering anchors and refs: finished"); } -bool Parser::_location_from_cont(Tree const& tree, size_t node, Location *C4_RESTRICT loc) const +id_type ReferenceResolver::lookup_(RefData *C4_RESTRICT ra) { - _RYML_CB_ASSERT(m_stack.m_callbacks, tree.is_container(node)); - if(!tree.is_stream(node)) + RYML_ASSERT(ra->type.is_key_ref() || ra->type.is_val_ref()); + RYML_ASSERT(ra->type.is_key_ref() != ra->type.is_val_ref()); + csubstr refname; + if(ra->type.is_val_ref()) { - const char *node_start = tree._p(node)->m_val.scalar.str; // this was stored in the container - if(tree.has_children(node)) - { - size_t child = tree.first_child(node); - if(tree.has_key(child)) - { - // when a map starts, the container was set after the key - csubstr k = tree.key(child); - if(k.str && node_start > k.str) - node_start = k.str; - } - } - *loc = val_location(node_start); - return true; + refname = m_tree->val_ref(ra->node); } - else // it's a stream + else { - *loc = val_location(m_buf.str); // just return the front of the buffer + RYML_ASSERT(ra->type.is_key_ref()); + refname = m_tree->key_ref(ra->node); } - return true; + while(ra->prev_anchor != NONE) + { + ra = &m_refs[ra->prev_anchor]; + if(m_tree->has_anchor(ra->node, refname)) + return ra->node; + } + detail::_report_err(m_tree->m_callbacks, "ERROR: anchor not found: '{}'", refname); + C4_UNREACHABLE_AFTER_ERR(); } +void ReferenceResolver::reset_(Tree *t_) +{ + if(t_->callbacks() != m_refs.m_callbacks) + { + m_refs.m_callbacks = t_->callbacks(); + } + m_refs.clear(); + m_tree = t_; +} -Location Parser::val_location(const char *val) const +void ReferenceResolver::resolve(Tree *t_) { - if(C4_UNLIKELY(val == nullptr)) - return {m_file, 0, 0, 0}; + _c4dbgp("resolving references..."); - _RYML_CB_CHECK(m_stack.m_callbacks, m_options.locations()); - // NOTE: if any of these checks fails, the parser needs to be - // instantiated with locations enabled. - _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.str == m_newline_offsets_buf.str); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.len == m_newline_offsets_buf.len); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_options.locations()); - _RYML_CB_ASSERT(m_stack.m_callbacks, !_locations_dirty()); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_newline_offsets != nullptr); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_newline_offsets_size > 0); - // NOTE: the pointer needs to belong to the buffer that was used to parse. - csubstr src = m_buf; - _RYML_CB_CHECK(m_stack.m_callbacks, val != nullptr || src.str == nullptr); - _RYML_CB_CHECK(m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str == nullptr && val == nullptr)); - // ok. search the first stored newline after the given ptr - using lineptr_type = size_t const* C4_RESTRICT; - lineptr_type lineptr = nullptr; - size_t offset = (size_t)(val - src.begin()); - if(m_newline_offsets_size < 30) // TODO magic number + reset_(t_); + + _c4dbg_tree("unresolved tree", *m_tree); + + gather_anchors_and_refs_(); + if(m_refs.empty()) + return; + + /* from the specs: "an alias node refers to the most recent + * node in the serialization having the specified anchor". So + * we need to start looking upward from ref nodes. + * + * @see http://yaml.org/spec/1.2/spec.html#id2765878 */ + _c4dbgp("matching anchors/refs..."); + for(id_type i = 0, e = m_refs.size(); i < e; ++i) { - // just do a linear search if the size is small. - for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr) + RefData &C4_RESTRICT refdata = m_refs.top(i); + if( ! refdata.type.is_ref()) + continue; + refdata.target = lookup_(&refdata); + } + _c4dbgp("matching anchors/refs: finished"); + + // insert the resolved references + _c4dbgp("modifying tree..."); + id_type prev_parent_ref = NONE; + id_type prev_parent_ref_after = NONE; + for(id_type i = 0, e = m_refs.size(); i < e; ++i) + { + RefData const& C4_RESTRICT refdata = m_refs[i]; + _c4dbgpf("instance {}/{}...", i, e); + if( ! refdata.type.is_ref()) + continue; + _c4dbgpf("instance {} is reference!", i); + if(refdata.parent_ref != NONE) + { + _c4dbgpf("ref {} has parent: {}", i, refdata.parent_ref); + _RYML_CB_ASSERT(m_tree->m_callbacks, m_tree->is_seq(refdata.parent_ref)); + const id_type p = m_tree->parent(refdata.parent_ref); + const id_type after = (prev_parent_ref != refdata.parent_ref) ? + refdata.parent_ref//prev_sibling(rd.parent_ref_sibling) + : + prev_parent_ref_after; + prev_parent_ref = refdata.parent_ref; + prev_parent_ref_after = m_tree->duplicate_children_no_rep(refdata.target, p, after); + m_tree->remove(refdata.node); + } + else { - if(*curr > offset) + _c4dbgpf("ref {} has no parent", i, refdata.parent_ref); + if(m_tree->has_key(refdata.node) && m_tree->key(refdata.node) == "<<") { - lineptr = curr; - break; + _c4dbgpf("ref {} is inheriting", i); + _RYML_CB_ASSERT(m_tree->m_callbacks, m_tree->is_keyval(refdata.node)); + const id_type p = m_tree->parent(refdata.node); + const id_type after = m_tree->prev_sibling(refdata.node); + m_tree->duplicate_children_no_rep(refdata.target, p, after); + m_tree->remove(refdata.node); } - } - } - else - { - // do a bisection search if the size is not small. - // - // We could use std::lower_bound but this is simple enough and - // spares the include of . - size_t count = m_newline_offsets_size; - size_t step; - lineptr_type it; - lineptr = m_newline_offsets; - while(count) - { - step = count >> 1; - it = lineptr + step; - if(*it < offset) + else if(refdata.type.is_key_ref()) { - lineptr = ++it; - count -= step + 1; + _c4dbgpf("ref {} is key ref", i); + _RYML_CB_ASSERT(m_tree->m_callbacks, m_tree->is_key_ref(refdata.node)); + _RYML_CB_ASSERT(m_tree->m_callbacks, m_tree->has_key_anchor(refdata.target) || m_tree->has_val_anchor(refdata.target)); + if(m_tree->has_val_anchor(refdata.target) && m_tree->val_anchor(refdata.target) == m_tree->key_ref(refdata.node)) + { + _RYML_CB_CHECK(m_tree->m_callbacks, !m_tree->is_container(refdata.target)); + _RYML_CB_CHECK(m_tree->m_callbacks, m_tree->has_val(refdata.target)); + const type_bits existing_style_flags = VAL_STYLE & m_tree->_p(refdata.target)->m_type.type; + static_assert((VAL_STYLE >> 1u) == (KEY_STYLE), "bad flags"); + m_tree->_p(refdata.node)->m_key.scalar = m_tree->val(refdata.target); + m_tree->_add_flags(refdata.node, KEY | (existing_style_flags >> 1u)); + } + else + { + _RYML_CB_CHECK(m_tree->m_callbacks, m_tree->key_anchor(refdata.target) == m_tree->key_ref(refdata.node)); + m_tree->_p(refdata.node)->m_key.scalar = m_tree->key(refdata.target); + // keys cannot be containers, so don't inherit container flags + const type_bits existing_style_flags = KEY_STYLE & m_tree->_p(refdata.target)->m_type.type; + m_tree->_add_flags(refdata.node, KEY | existing_style_flags); + } } - else + else // val ref { - count = step; + _c4dbgpf("ref {} is val ref", i); + _RYML_CB_ASSERT(m_tree->m_callbacks, refdata.type.is_val_ref()); + if(m_tree->has_key_anchor(refdata.target) && m_tree->key_anchor(refdata.target) == m_tree->val_ref(refdata.node)) + { + _RYML_CB_CHECK(m_tree->m_callbacks, !m_tree->is_container(refdata.target)); + _RYML_CB_CHECK(m_tree->m_callbacks, m_tree->has_val(refdata.target)); + // keys cannot be containers, so don't inherit container flags + const type_bits existing_style_flags = (KEY_STYLE) & m_tree->_p(refdata.target)->m_type.type; + static_assert((KEY_STYLE << 1u) == (VAL_STYLE), "bad flags"); + m_tree->_p(refdata.node)->m_val.scalar = m_tree->key(refdata.target); + m_tree->_add_flags(refdata.node, VAL | (existing_style_flags << 1u)); + } + else + { + m_tree->duplicate_contents(refdata.target, refdata.node); + } } } - } - _RYML_CB_ASSERT(m_stack.m_callbacks, lineptr >= m_newline_offsets); - _RYML_CB_ASSERT(m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size); - _RYML_CB_ASSERT(m_stack.m_callbacks, *lineptr > offset); - Location loc; - loc.name = m_file; - loc.offset = offset; - loc.line = (size_t)(lineptr - m_newline_offsets); - if(lineptr > m_newline_offsets) - loc.col = (offset - *(lineptr-1) - 1u); - else - loc.col = offset; - return loc; -} - -void Parser::_prepare_locations() -{ - m_newline_offsets_buf = m_buf; - size_t numnewlines = 1u + m_buf.count('\n'); - _resize_locations(numnewlines); - m_newline_offsets_size = 0; - for(size_t i = 0; i < m_buf.len; i++) - if(m_buf[i] == '\n') - m_newline_offsets[m_newline_offsets_size++] = i; - m_newline_offsets[m_newline_offsets_size++] = m_buf.len; - _RYML_CB_ASSERT(m_stack.m_callbacks, m_newline_offsets_size == numnewlines); -} + } + _c4dbgp("modifying tree: finished"); -void Parser::_resize_locations(size_t numnewlines) -{ - if(numnewlines > m_newline_offsets_capacity) + // clear anchors and refs + _c4dbgp("clearing anchors/refs"); + for(auto const& C4_RESTRICT ar : m_refs) { - if(m_newline_offsets) - _RYML_CB_FREE(m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity); - m_newline_offsets = _RYML_CB_ALLOC_HINT(m_stack.m_callbacks, size_t, numnewlines, m_newline_offsets); - m_newline_offsets_capacity = numnewlines; + m_tree->rem_anchor_ref(ar.node); + if(ar.parent_ref != NONE) + if(m_tree->type(ar.parent_ref) != NOTYPE) + m_tree->remove(ar.parent_ref); } -} + _c4dbgp("clearing anchors/refs: finished"); -bool Parser::_locations_dirty() const -{ - return !m_newline_offsets_size; -} + _c4dbg_tree("resolved tree", *m_tree); -} // namespace yml -} // namespace c4 + m_tree = nullptr; + _c4dbgp("resolving references: finished"); +} -#if defined(_MSC_VER) -# pragma warning(pop) -#elif defined(__clang__) -# pragma clang diagnostic pop -#elif defined(__GNUC__) -# pragma GCC diagnostic pop -#endif +} // namespace ryml +} // namespace c4 #endif /* RYML_SINGLE_HDR_DEFINE_NOW */ -// (end https://github.com/biojppm/rapidyaml/src/c4/yml/parse.cpp) +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/reference_resolver.cpp) //******************************************************************************** //-------------------------------------------------------------------------------- -// src/c4/yml/node.cpp -// https://github.com/biojppm/rapidyaml/src/c4/yml/node.cpp +// src/c4/yml/parse.cpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/parse.cpp //-------------------------------------------------------------------------------- //******************************************************************************** #ifdef RYML_SINGLE_HDR_DEFINE_NOW // amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/parse.hpp +//#include "c4/yml/parse.hpp" +#if !defined(C4_YML_PARSE_HPP_) && !defined(_C4_YML_PARSE_HPP_) +#error "amalgamate: file c4/yml/parse.hpp must have been included at this point" +#endif /* C4_YML_PARSE_HPP_ */ + + +#ifndef _C4_YML_NODE_HPP_ +// amalgamate: removed include of // https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp //#include "c4/yml/node.hpp" #if !defined(C4_YML_NODE_HPP_) && !defined(_C4_YML_NODE_HPP_) #error "amalgamate: file c4/yml/node.hpp must have been included at this point" #endif /* C4_YML_NODE_HPP_ */ +#endif +#ifndef _C4_YML_PARSE_ENGINE_HPP_ +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/parse_engine.hpp +//#include "c4/yml/parse_engine.hpp" +#if !defined(C4_YML_PARSE_ENGINE_HPP_) && !defined(_C4_YML_PARSE_ENGINE_HPP_) +#error "amalgamate: file c4/yml/parse_engine.hpp must have been included at this point" +#endif /* C4_YML_PARSE_ENGINE_HPP_ */ -namespace c4 { -namespace yml { +#endif +#ifndef _C4_YML_PARSE_ENGINE_DEF_HPP_ +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/parse_engine.def.hpp +//#include "c4/yml/parse_engine.def.hpp" +#if !defined(C4_YML_PARSE_ENGINE_DEF_HPP_) && !defined(_C4_YML_PARSE_ENGINE_DEF_HPP_) +#error "amalgamate: file c4/yml/parse_engine.def.hpp must have been included at this point" +#endif /* C4_YML_PARSE_ENGINE_DEF_HPP_ */ +#endif +#ifndef _C4_YML_EVENT_HANDLER_TREE_HPP_ +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/event_handler_tree.hpp +//#include "c4/yml/event_handler_tree.hpp" +#if !defined(C4_YML_EVENT_HANDLER_TREE_HPP_) && !defined(_C4_YML_EVENT_HANDLER_TREE_HPP_) +#error "amalgamate: file c4/yml/event_handler_tree.hpp must have been included at this point" +#endif /* C4_YML_EVENT_HANDLER_TREE_HPP_ */ +#endif -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- //----------------------------------------------------------------------------- -size_t NodeRef::set_key_serialized(c4::fmt::const_base64_wrapper w) +namespace c4 { +namespace yml { + +// instantiate the parser class +template class ParseEngine; + +namespace { +inline void _reset_tree_handler(Parser *parser, Tree *t, id_type node_id) { - _apply_seed(); - csubstr encoded = this->to_arena(w); - this->set_key(encoded); - return encoded.len; + RYML_ASSERT(parser); + RYML_ASSERT(t); + if(!parser->m_evt_handler) + _RYML_CB_ERR(t->m_callbacks, "event handler is not set"); + parser->m_evt_handler->reset(t, node_id); + RYML_ASSERT(parser->m_evt_handler->m_tree == t); } +} // namespace -size_t NodeRef::set_val_serialized(c4::fmt::const_base64_wrapper w) +void parse_in_place(Parser *parser, csubstr filename, substr yaml, Tree *t, id_type node_id) +{ + _reset_tree_handler(parser, t, node_id); + parser->parse_in_place_ev(filename, yaml); +} + +void parse_json_in_place(Parser *parser, csubstr filename, substr json, Tree *t, id_type node_id) +{ + _reset_tree_handler(parser, t, node_id); + parser->parse_json_in_place_ev(filename, json); +} + + +// this is vertically aligned to highlight the parameter differences. +void parse_in_place(Parser *parser, substr yaml, Tree *t, id_type node_id) { parse_in_place(parser, {}, yaml, t, node_id); } +void parse_in_place(Parser *parser, csubstr filename, substr yaml, Tree *t ) { RYML_CHECK(t); parse_in_place(parser, filename, yaml, t, t->root_id()); } +void parse_in_place(Parser *parser, substr yaml, Tree *t ) { RYML_CHECK(t); parse_in_place(parser, {} , yaml, t, t->root_id()); } +void parse_in_place(Parser *parser, csubstr filename, substr yaml, NodeRef node ) { RYML_CHECK(!node.invalid()); parse_in_place(parser, filename, yaml, node.tree(), node.id()); } +void parse_in_place(Parser *parser, substr yaml, NodeRef node ) { RYML_CHECK(!node.invalid()); parse_in_place(parser, {} , yaml, node.tree(), node.id()); } +Tree parse_in_place(Parser *parser, csubstr filename, substr yaml ) { RYML_CHECK(parser); RYML_CHECK(parser->m_evt_handler); Tree tree(parser->callbacks()); parse_in_place(parser, filename, yaml, &tree, tree.root_id()); return tree; } +Tree parse_in_place(Parser *parser, substr yaml ) { RYML_CHECK(parser); RYML_CHECK(parser->m_evt_handler); Tree tree(parser->callbacks()); parse_in_place(parser, {} , yaml, &tree, tree.root_id()); return tree; } + +// this is vertically aligned to highlight the parameter differences. +void parse_in_place(csubstr filename, substr yaml, Tree *t, id_type node_id) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); parse_in_place(&parser, filename, yaml, t, node_id); } +void parse_in_place( substr yaml, Tree *t, id_type node_id) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); parse_in_place(&parser, {} , yaml, t, node_id); } +void parse_in_place(csubstr filename, substr yaml, Tree *t ) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); parse_in_place(&parser, filename, yaml, t, t->root_id()); } +void parse_in_place( substr yaml, Tree *t ) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); parse_in_place(&parser, {} , yaml, t, t->root_id()); } +void parse_in_place(csubstr filename, substr yaml, NodeRef node ) { RYML_CHECK(!node.invalid()); Parser::handler_type event_handler(node.tree()->callbacks()); Parser parser(&event_handler); parse_in_place(&parser, filename, yaml, node.tree(), node.id()); } +void parse_in_place( substr yaml, NodeRef node ) { RYML_CHECK(!node.invalid()); Parser::handler_type event_handler(node.tree()->callbacks()); Parser parser(&event_handler); parse_in_place(&parser, {} , yaml, node.tree(), node.id()); } +Tree parse_in_place(csubstr filename, substr yaml ) { Parser::handler_type event_handler; Parser parser(&event_handler); Tree tree(parser.callbacks()); parse_in_place(&parser, filename, yaml, &tree, tree.root_id()); return tree; } +Tree parse_in_place( substr yaml ) { Parser::handler_type event_handler; Parser parser(&event_handler); Tree tree(parser.callbacks()); parse_in_place(&parser, {} , yaml, &tree, tree.root_id()); return tree; } + + +// this is vertically aligned to highlight the parameter differences. +void parse_json_in_place(Parser *parser, substr json, Tree *t, id_type node_id) { parse_json_in_place(parser, {}, json, t, node_id); } +void parse_json_in_place(Parser *parser, csubstr filename, substr json, Tree *t ) { RYML_CHECK(t); parse_json_in_place(parser, filename, json, t, t->root_id()); } +void parse_json_in_place(Parser *parser, substr json, Tree *t ) { RYML_CHECK(t); parse_json_in_place(parser, {} , json, t, t->root_id()); } +void parse_json_in_place(Parser *parser, csubstr filename, substr json, NodeRef node ) { RYML_CHECK(!node.invalid()); parse_json_in_place(parser, filename, json, node.tree(), node.id()); } +void parse_json_in_place(Parser *parser, substr json, NodeRef node ) { RYML_CHECK(!node.invalid()); parse_json_in_place(parser, {} , json, node.tree(), node.id()); } +Tree parse_json_in_place(Parser *parser, csubstr filename, substr json ) { RYML_CHECK(parser); RYML_CHECK(parser->m_evt_handler); Tree tree(parser->callbacks()); parse_json_in_place(parser, filename, json, &tree, tree.root_id()); return tree; } +Tree parse_json_in_place(Parser *parser, substr json ) { RYML_CHECK(parser); RYML_CHECK(parser->m_evt_handler); Tree tree(parser->callbacks()); parse_json_in_place(parser, {} , json, &tree, tree.root_id()); return tree; } + +// this is vertically aligned to highlight the parameter differences. +void parse_json_in_place(csubstr filename, substr json, Tree *t, id_type node_id) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); parse_json_in_place(&parser, filename, json, t, node_id); } +void parse_json_in_place( substr json, Tree *t, id_type node_id) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); parse_json_in_place(&parser, {} , json, t, node_id); } +void parse_json_in_place(csubstr filename, substr json, Tree *t ) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); parse_json_in_place(&parser, filename, json, t, t->root_id()); } +void parse_json_in_place( substr json, Tree *t ) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); parse_json_in_place(&parser, {} , json, t, t->root_id()); } +void parse_json_in_place(csubstr filename, substr json, NodeRef node ) { RYML_CHECK(!node.invalid()); Parser::handler_type event_handler(node.tree()->callbacks()); Parser parser(&event_handler); parse_json_in_place(&parser, filename, json, node.tree(), node.id()); } +void parse_json_in_place( substr json, NodeRef node ) { RYML_CHECK(!node.invalid()); Parser::handler_type event_handler(node.tree()->callbacks()); Parser parser(&event_handler); parse_json_in_place(&parser, {} , json, node.tree(), node.id()); } +Tree parse_json_in_place(csubstr filename, substr json ) { Parser::handler_type event_handler; Parser parser(&event_handler); Tree tree(parser.callbacks()); parse_json_in_place(&parser, filename, json, &tree, tree.root_id()); return tree; } +Tree parse_json_in_place( substr json ) { Parser::handler_type event_handler; Parser parser(&event_handler); Tree tree(parser.callbacks()); parse_json_in_place(&parser, {} , json, &tree, tree.root_id()); return tree; } + + +// this is vertically aligned to highlight the parameter differences. +void parse_in_arena(Parser *parser, csubstr filename, csubstr yaml, Tree *t, id_type node_id) { RYML_CHECK(t); substr src = t->copy_to_arena(yaml); parse_in_place(parser, filename, src, t, node_id); } +void parse_in_arena(Parser *parser, csubstr yaml, Tree *t, id_type node_id) { RYML_CHECK(t); substr src = t->copy_to_arena(yaml); parse_in_place(parser, {} , src, t, node_id); } +void parse_in_arena(Parser *parser, csubstr filename, csubstr yaml, Tree *t ) { RYML_CHECK(t); substr src = t->copy_to_arena(yaml); parse_in_place(parser, filename, src, t, t->root_id()); } +void parse_in_arena(Parser *parser, csubstr yaml, Tree *t ) { RYML_CHECK(t); substr src = t->copy_to_arena(yaml); parse_in_place(parser, {} , src, t, t->root_id()); } +void parse_in_arena(Parser *parser, csubstr filename, csubstr yaml, NodeRef node ) { RYML_CHECK(!node.invalid()); substr src = node.tree()->copy_to_arena(yaml); parse_in_place(parser, filename, src, node.tree(), node.id()); } +void parse_in_arena(Parser *parser, csubstr yaml, NodeRef node ) { RYML_CHECK(!node.invalid()); substr src = node.tree()->copy_to_arena(yaml); parse_in_place(parser, {} , src, node.tree(), node.id()); } +Tree parse_in_arena(Parser *parser, csubstr filename, csubstr yaml ) { RYML_CHECK(parser); RYML_CHECK(parser->m_evt_handler); Tree tree(parser->callbacks()); substr src = tree.copy_to_arena(yaml); parse_in_place(parser, filename, src, &tree, tree.root_id()); return tree; } +Tree parse_in_arena(Parser *parser, csubstr yaml ) { RYML_CHECK(parser); RYML_CHECK(parser->m_evt_handler); Tree tree(parser->callbacks()); substr src = tree.copy_to_arena(yaml); parse_in_place(parser, {} , src, &tree, tree.root_id()); return tree; } + +// this is vertically aligned to highlight the parameter differences. +void parse_in_arena(csubstr filename, csubstr yaml, Tree *t, id_type node_id) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); substr src = t->copy_to_arena(yaml); parse_in_place(&parser, filename, src, t, node_id); } +void parse_in_arena( csubstr yaml, Tree *t, id_type node_id) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); substr src = t->copy_to_arena(yaml); parse_in_place(&parser, {} , src, t, node_id); } +void parse_in_arena(csubstr filename, csubstr yaml, Tree *t ) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); substr src = t->copy_to_arena(yaml); parse_in_place(&parser, filename, src, t, t->root_id()); } +void parse_in_arena( csubstr yaml, Tree *t ) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); substr src = t->copy_to_arena(yaml); parse_in_place(&parser, {} , src, t, t->root_id()); } +void parse_in_arena(csubstr filename, csubstr yaml, NodeRef node ) { RYML_CHECK(!node.invalid()); Parser::handler_type event_handler(node.tree()->callbacks()); Parser parser(&event_handler); substr src = node.tree()->copy_to_arena(yaml); parse_in_place(&parser, filename, src, node.tree(), node.id()); } +void parse_in_arena( csubstr yaml, NodeRef node ) { RYML_CHECK(!node.invalid()); Parser::handler_type event_handler(node.tree()->callbacks()); Parser parser(&event_handler); substr src = node.tree()->copy_to_arena(yaml); parse_in_place(&parser, {} , src, node.tree(), node.id()); } +Tree parse_in_arena(csubstr filename, csubstr yaml ) { Parser::handler_type event_handler; Parser parser(&event_handler); Tree tree(parser.callbacks()); substr src = tree.copy_to_arena(yaml); parse_in_place(&parser, filename, src, &tree, tree.root_id()); return tree; } +Tree parse_in_arena( csubstr yaml ) { Parser::handler_type event_handler; Parser parser(&event_handler); Tree tree(parser.callbacks()); substr src = tree.copy_to_arena(yaml); parse_in_place(&parser, {} , src, &tree, tree.root_id()); return tree; } + + +// this is vertically aligned to highlight the parameter differences. +void parse_json_in_arena(Parser *parser, csubstr filename, csubstr json, Tree *t, id_type node_id) { RYML_CHECK(t); substr src = t->copy_to_arena(json); parse_json_in_place(parser, filename, src, t, node_id); } +void parse_json_in_arena(Parser *parser, csubstr json, Tree *t, id_type node_id) { RYML_CHECK(t); substr src = t->copy_to_arena(json); parse_json_in_place(parser, {} , src, t, node_id); } +void parse_json_in_arena(Parser *parser, csubstr filename, csubstr json, Tree *t ) { RYML_CHECK(t); substr src = t->copy_to_arena(json); parse_json_in_place(parser, filename, src, t, t->root_id()); } +void parse_json_in_arena(Parser *parser, csubstr json, Tree *t ) { RYML_CHECK(t); substr src = t->copy_to_arena(json); parse_json_in_place(parser, {} , src, t, t->root_id()); } +void parse_json_in_arena(Parser *parser, csubstr filename, csubstr json, NodeRef node ) { RYML_CHECK(!node.invalid()); substr src = node.tree()->copy_to_arena(json); parse_json_in_place(parser, filename, src, node.tree(), node.id()); } +void parse_json_in_arena(Parser *parser, csubstr json, NodeRef node ) { RYML_CHECK(!node.invalid()); substr src = node.tree()->copy_to_arena(json); parse_json_in_place(parser, {} , src, node.tree(), node.id()); } +Tree parse_json_in_arena(Parser *parser, csubstr filename, csubstr json ) { RYML_CHECK(parser); RYML_CHECK(parser->m_evt_handler); Tree tree(parser->callbacks()); substr src = tree.copy_to_arena(json); parse_json_in_place(parser, filename, src, &tree, tree.root_id()); return tree; } +Tree parse_json_in_arena(Parser *parser, csubstr json ) { RYML_CHECK(parser); RYML_CHECK(parser->m_evt_handler); Tree tree(parser->callbacks()); substr src = tree.copy_to_arena(json); parse_json_in_place(parser, {} , src, &tree, tree.root_id()); return tree; } + +// this is vertically aligned to highlight the parameter differences. +void parse_json_in_arena(csubstr filename, csubstr json, Tree *t, id_type node_id) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); substr src = t->copy_to_arena(json); parse_json_in_place(&parser, filename, src, t, node_id); } +void parse_json_in_arena( csubstr json, Tree *t, id_type node_id) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); substr src = t->copy_to_arena(json); parse_json_in_place(&parser, {} , src, t, node_id); } +void parse_json_in_arena(csubstr filename, csubstr json, Tree *t ) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); substr src = t->copy_to_arena(json); parse_json_in_place(&parser, filename, src, t, t->root_id()); } +void parse_json_in_arena( csubstr json, Tree *t ) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); substr src = t->copy_to_arena(json); parse_json_in_place(&parser, {} , src, t, t->root_id()); } +void parse_json_in_arena(csubstr filename, csubstr json, NodeRef node ) { RYML_CHECK(!node.invalid()); Parser::handler_type event_handler(node.tree()->callbacks()); Parser parser(&event_handler); substr src = node.tree()->copy_to_arena(json); parse_json_in_place(&parser, filename, src, node.tree(), node.id()); } +void parse_json_in_arena( csubstr json, NodeRef node ) { RYML_CHECK(!node.invalid()); Parser::handler_type event_handler(node.tree()->callbacks()); Parser parser(&event_handler); substr src = node.tree()->copy_to_arena(json); parse_json_in_place(&parser, {} , src, node.tree(), node.id()); } +Tree parse_json_in_arena(csubstr filename, csubstr json ) { Parser::handler_type event_handler; Parser parser(&event_handler); Tree tree(parser.callbacks()); substr src = tree.copy_to_arena(json); parse_json_in_place(&parser, filename, src, &tree, tree.root_id()); return tree; } +Tree parse_json_in_arena( csubstr json ) { Parser::handler_type event_handler; Parser parser(&event_handler); Tree tree(parser.callbacks()); substr src = tree.copy_to_arena(json); parse_json_in_place(&parser, {} , src, &tree, tree.root_id()); return tree; } + + +RYML_EXPORT C4_NO_INLINE size_t _find_last_newline_and_larger_indentation(csubstr s, size_t indentation) noexcept +{ + if(indentation + 1 > s.len) + return npos; + for(size_t i = s.len-indentation-1; i != size_t(-1); --i) + { + if(s.str[i] == '\n') + { + csubstr rem = s.sub(i + 1); + size_t first = rem.first_not_of(' '); + first = (first != npos) ? first : rem.len; + if(first > indentation) + return i; + } + } + return npos; +} + +//----------------------------------------------------------------------------- + +RYML_EXPORT id_type estimate_tree_capacity(csubstr src) { - _apply_seed(); - csubstr encoded = this->to_arena(w); - this->set_val(encoded); - return encoded.len; + id_type num_nodes = 1; // root + for(size_t i = 0; i < src.len; ++i) + { + const char c = src.str[i]; + num_nodes += (c == '\n') || (c == ',') || (c == '[') || (c == '{'); + } + return num_nodes; } } // namespace yml @@ -32933,126 +41883,59 @@ size_t NodeRef::set_val_serialized(c4::fmt::const_base64_wrapper w) #endif /* RYML_SINGLE_HDR_DEFINE_NOW */ -// (end https://github.com/biojppm/rapidyaml/src/c4/yml/node.cpp) +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/parse.cpp) //******************************************************************************** //-------------------------------------------------------------------------------- -// src/c4/yml/preprocess.hpp -// https://github.com/biojppm/rapidyaml/src/c4/yml/preprocess.hpp +// src/c4/yml/node.cpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/node.cpp //-------------------------------------------------------------------------------- //******************************************************************************** -#ifndef _C4_YML_PREPROCESS_HPP_ -#define _C4_YML_PREPROCESS_HPP_ - -/** @file preprocess.hpp Functions for preprocessing YAML prior to parsing. */ - -/** @defgroup Preprocessors Preprocessor functions - * - * These are the existing preprocessors: - * - * @code{.cpp} - * size_t preprocess_json(csubstr json, substr buf) - * size_t preprocess_rxmap(csubstr json, substr buf) - * @endcode - */ - -#ifndef _C4_YML_COMMON_HPP_ -//included above: -//#include "./common.hpp" -#endif +#ifdef RYML_SINGLE_HDR_DEFINE_NOW // amalgamate: removed include of -// https://github.com/biojppm/rapidyaml/src/c4/substr.hpp -//#include -#if !defined(C4_SUBSTR_HPP_) && !defined(_C4_SUBSTR_HPP_) -#error "amalgamate: file c4/substr.hpp must have been included at this point" -#endif /* C4_SUBSTR_HPP_ */ - +// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp +//#include "c4/yml/node.hpp" +#if !defined(C4_YML_NODE_HPP_) && !defined(_C4_YML_NODE_HPP_) +#error "amalgamate: file c4/yml/node.hpp must have been included at this point" +#endif /* C4_YML_NODE_HPP_ */ namespace c4 { namespace yml { -namespace detail { -using Preprocessor = size_t(csubstr, substr); -template -substr preprocess_into_container(csubstr input, CharContainer *out) -{ - // try to write once. the preprocessor will stop writing at the end of - // the container, but will process all the input to determine the - // required container size. - size_t sz = PP(input, to_substr(*out)); - // if the container size is not enough, resize, and run again in the - // resized container - if(sz > out->size()) - { - out->resize(sz); - sz = PP(input, to_substr(*out)); - } - return to_substr(*out).first(sz); -} -} // namespace detail - - -//----------------------------------------------------------------------------- - -/** @name preprocess_rxmap - * Convert flow-type relaxed maps (with implicit bools) into strict YAML - * flow map. - * - * @code{.yaml} - * {a, b, c, d: [e, f], g: {a, b}} - * # is converted into this: - * {a: 1, b: 1, c: 1, d: [e, f], g: {a, b}} - * @endcode - - * @note this is NOT recursive - conversion happens only in the top-level map - * @param rxmap A relaxed map - * @param buf output buffer - * @param out output container - */ -//@{ -/** Write into a given output buffer. This function is safe to call with - * empty or small buffers; it won't write beyond the end of the buffer. - * - * @return the number of characters required for output - */ -RYML_EXPORT size_t preprocess_rxmap(csubstr rxmap, substr buf); +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- -/** Write into an existing container. It is resized to contained the output. - * @return a substr of the container - * @overload preprocess_rxmap */ -template -substr preprocess_rxmap(csubstr rxmap, CharContainer *out) +size_t NodeRef::set_key_serialized(c4::fmt::const_base64_wrapper w) { - return detail::preprocess_into_container(rxmap, out); + _apply_seed(); + csubstr encoded = this->to_arena(w); + this->set_key(encoded); + return encoded.len; } - -/** Create a container with the result. - * @overload preprocess_rxmap */ -template -CharContainer preprocess_rxmap(csubstr rxmap) +size_t NodeRef::set_val_serialized(c4::fmt::const_base64_wrapper w) { - CharContainer out; - preprocess_rxmap(rxmap, &out); - return out; + _apply_seed(); + csubstr encoded = this->to_arena(w); + this->set_val(encoded); + return encoded.len; } -//@} - } // namespace yml } // namespace c4 -#endif /* _C4_YML_PREPROCESS_HPP_ */ +#endif /* RYML_SINGLE_HDR_DEFINE_NOW */ -// (end https://github.com/biojppm/rapidyaml/src/c4/yml/preprocess.hpp) +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/node.cpp) @@ -33084,6 +41967,7 @@ CharContainer preprocess_rxmap(csubstr rxmap) namespace c4 { namespace yml { +C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast") //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- @@ -33183,6 +42067,7 @@ size_t preprocess_rxmap(csubstr s, substr buf) return writer.pos; } +C4_SUPPRESS_WARNING_GCC_CLANG_POP } // namespace yml } // namespace c4 @@ -33226,7 +42111,7 @@ namespace c4 { namespace yml { -void check_invariants(Tree const& t, size_t node=NONE); +void check_invariants(Tree const& t, id_type node=NONE); void check_free_list(Tree const& t); void check_arena(Tree const& t); @@ -33235,7 +42120,7 @@ void check_arena(Tree const& t); //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- -inline void check_invariants(Tree const& t, size_t node) +inline void check_invariants(Tree const& t, id_type node) { if(node == NONE) { @@ -33243,8 +42128,8 @@ inline void check_invariants(Tree const& t, size_t node) node = t.root_id(); } - auto const& n = *t._p(node); -#ifdef RYML_DBG + NodeData const& n = *t._p(node); +#if defined(RYML_DBG) && 0 if(n.m_first_child != NONE || n.m_last_child != NONE) { printf("check(%zu): fc=%zu lc=%zu\n", node, n.m_first_child, n.m_last_child); @@ -33309,10 +42194,10 @@ inline void check_invariants(Tree const& t, size_t node) C4_CHECK(t._p(n.m_next_sibling)->m_next_sibling != node); } - size_t count = 0; - for(size_t i = n.m_first_child; i != NONE; i = t.next_sibling(i)) + id_type count = 0; + for(id_type i = n.m_first_child; i != NONE; i = t.next_sibling(i)) { -#ifdef RYML_DBG +#if defined(RYML_DBG) && 0 printf("check(%zu): descend to child[%zu]=%zu\n", node, count, i); #endif auto const& ch = *t._p(i); @@ -33340,7 +42225,7 @@ inline void check_invariants(Tree const& t, size_t node) check_arena(t); } - for(size_t i = t.first_child(node); i != NONE; i = t.next_sibling(i)) + for(id_type i = t.first_child(node); i != NONE; i = t.next_sibling(i)) { check_invariants(t, i); } @@ -33368,8 +42253,8 @@ inline void check_free_list(Tree const& t) //C4_CHECK(head.m_prev_sibling == NONE); //C4_CHECK(tail.m_next_sibling == NONE); - size_t count = 0; - for(size_t i = t.m_free_head, prev = NONE; i != NONE; i = t._p(i)->m_next_sibling) + id_type count = 0; + for(id_type i = t.m_free_head, prev = NONE; i != NONE; i = t._p(i)->m_next_sibling) { auto const& elm = *t._p(i); if(&elm != &head) @@ -33438,19 +42323,76 @@ inline void check_arena(Tree const& t) #endif /* C4_YML_NODE_HPP_ */ +#ifdef RYML_DBG +#define _c4dbg_tree(...) print_tree(__VA_ARGS__) +#define _c4dbg_node(...) print_tree(__VA_ARGS__) +#else +#define _c4dbg_tree(...) +#define _c4dbg_node(...) +#endif namespace c4 { namespace yml { +C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast") +C4_SUPPRESS_WARNING_GCC("-Wuseless-cast") -inline size_t print_node(Tree const& p, size_t node, int level, size_t count, bool print_children) +inline const char* _container_style_code(Tree const& p, id_type node) +{ + if(p.is_container(node)) + { + if(p._p(node)->m_type & (FLOW_SL|FLOW_ML)) + { + return "[FLOW]"; + } + if(p._p(node)->m_type & (BLOCK)) + { + return "[BLCK]"; + } + } + return ""; +} +inline char _scalar_code(NodeType masked) +{ + if(masked & (KEY_LITERAL|VAL_LITERAL)) + return '|'; + if(masked & (KEY_FOLDED|VAL_FOLDED)) + return '>'; + if(masked & (KEY_SQUO|VAL_SQUO)) + return '\''; + if(masked & (KEY_DQUO|VAL_DQUO)) + return '"'; + if(masked & (KEY_PLAIN|VAL_PLAIN)) + return '~'; + return '@'; +} +inline char _scalar_code_key(NodeType t) +{ + return _scalar_code(t & KEY_STYLE); +} +inline char _scalar_code_val(NodeType t) +{ + return _scalar_code(t & VAL_STYLE); +} +inline char _scalar_code_key(Tree const& p, id_type node) +{ + return _scalar_code_key(p._p(node)->m_type); +} +inline char _scalar_code_val(Tree const& p, id_type node) +{ + return _scalar_code_key(p._p(node)->m_type); +} +inline id_type print_node(Tree const& p, id_type node, int level, id_type count, bool print_children) { - printf("[%zd]%*s[%zd] %p", count, (2*level), "", node, (void*)p.get(node)); + printf("[%zu]%*s[%zu] %p", (size_t)count, (2*level), "", (size_t)node, (void const*)p.get(node)); if(p.is_root(node)) { printf(" [ROOT]"); } - printf(" %s:", p.type_str(node)); + char typebuf[128]; + csubstr typestr = p.type(node).type_str(typebuf); + RYML_CHECK(typestr.str); + printf(" %.*s", (int)typestr.len, typestr.str); if(p.has_key(node)) { if(p.has_key_anchor(node)) @@ -33461,65 +42403,47 @@ inline size_t print_node(Tree const& p, size_t node, int level, size_t count, bo if(p.has_key_tag(node)) { csubstr kt = p.key_tag(node); - csubstr k = p.key(node); - printf(" %.*s '%.*s'", (int)kt.len, kt.str, (int)k.len, k.str); + printf(" <%.*s>", (int)kt.len, kt.str); } - else - { - csubstr k = p.key(node); - printf(" '%.*s'", (int)k.len, k.str); - } - } - else - { - RYML_ASSERT( ! p.has_key_tag(node)); + const char code = _scalar_code_key(p, node); + csubstr k = p.key(node); + printf(" %c%.*s%c :", code, (int)k.len, k.str, code); } - if(p.has_val(node)) + if(p.has_val_anchor(node)) { - if(p.has_val_tag(node)) - { - csubstr vt = p.val_tag(node); - csubstr v = p.val(node); - printf(" %.*s '%.*s'", (int)vt.len, vt.str, (int)v.len, v.str); - } - else - { - csubstr v = p.val(node); - printf(" '%.*s'", (int)v.len, v.str); - } + csubstr a = p.val_anchor(node); + printf(" &%.*s'", (int)a.len, a.str); } - else + if(p.has_val_tag(node)) { - if(p.has_val_tag(node)) - { - csubstr vt = p.val_tag(node); - printf(" %.*s", (int)vt.len, vt.str); - } + csubstr vt = p.val_tag(node); + printf(" <%.*s>", (int)vt.len, vt.str); } - if(p.has_val_anchor(node)) + if(p.has_val(node)) { - auto &a = p.val_anchor(node); - printf(" valanchor='&%.*s'", (int)a.len, a.str); + const char code = _scalar_code_val(p, node); + csubstr v = p.val(node); + printf(" %c%.*s%c", code, (int)v.len, v.str, code); } - printf(" (%zd sibs)", p.num_siblings(node)); + printf(" (%zu sibs)", (size_t)p.num_siblings(node)); ++count; - if(p.is_container(node)) + if(!p.is_container(node)) + { + printf("\n"); + } + else { - printf(" %zd children:\n", p.num_children(node)); + printf(" (%zu children)\n", (size_t)p.num_children(node)); if(print_children) { - for(size_t i = p.first_child(node); i != NONE; i = p.next_sibling(i)) + for(id_type i = p.first_child(node); i != NONE; i = p.next_sibling(i)) { count = print_node(p, i, level+1, count, print_children); } } } - else - { - printf("\n"); - } return count; } @@ -33539,21 +42463,38 @@ inline void print_node(ConstNodeRef const& p, int level=0) //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- -inline size_t print_tree(Tree const& p, size_t node=NONE) +inline id_type print_tree(const char *message, Tree const& p, id_type node=NONE) { printf("--------------------------------------\n"); - size_t ret = 0; + if(message != nullptr) + printf("%s:\n", message); + id_type ret = 0; if(!p.empty()) { if(node == NONE) node = p.root_id(); ret = print_node(p, node, 0, 0, true); } - printf("#nodes=%zd vs #printed=%zd\n", p.size(), ret); + printf("#nodes=%zu vs #printed=%zu\n", (size_t)p.size(), (size_t)ret); printf("--------------------------------------\n"); return ret; } +inline id_type print_tree(Tree const& p, id_type node=NONE) +{ + return print_tree(nullptr, p, node); +} + +inline void print_tree(ConstNodeRef const& p, int level) +{ + print_node(p, level); + for(ConstNodeRef ch : p.children()) + { + print_tree(ch, level+1); + } +} + +C4_SUPPRESS_WARNING_GCC_CLANG_POP } /* namespace yml */ } /* namespace c4 */ @@ -33576,6 +42517,13 @@ inline size_t print_tree(Tree const& p, size_t node=NONE) #ifndef _C4_YML_YML_HPP_ #define _C4_YML_YML_HPP_ +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/version.hpp +//#include "c4/yml/version.hpp" +#if !defined(C4_YML_VERSION_HPP_) && !defined(_C4_YML_VERSION_HPP_) +#error "amalgamate: file c4/yml/version.hpp must have been included at this point" +#endif /* C4_YML_VERSION_HPP_ */ + // amalgamate: removed include of // https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp //#include "c4/yml/tree.hpp" @@ -33597,6 +42545,27 @@ inline size_t print_tree(Tree const& p, size_t node=NONE) #error "amalgamate: file c4/yml/emit.hpp must have been included at this point" #endif /* C4_YML_EMIT_HPP_ */ +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/event_handler_tree.hpp +//#include "c4/yml/event_handler_tree.hpp" +#if !defined(C4_YML_EVENT_HANDLER_TREE_HPP_) && !defined(_C4_YML_EVENT_HANDLER_TREE_HPP_) +#error "amalgamate: file c4/yml/event_handler_tree.hpp must have been included at this point" +#endif /* C4_YML_EVENT_HANDLER_TREE_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/parse_engine.hpp +//#include "c4/yml/parse_engine.hpp" +#if !defined(C4_YML_PARSE_ENGINE_HPP_) && !defined(_C4_YML_PARSE_ENGINE_HPP_) +#error "amalgamate: file c4/yml/parse_engine.hpp must have been included at this point" +#endif /* C4_YML_PARSE_ENGINE_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/filter_processor.hpp +//#include "c4/yml/filter_processor.hpp" +#if !defined(C4_YML_FILTER_PROCESSOR_HPP_) && !defined(_C4_YML_FILTER_PROCESSOR_HPP_) +#error "amalgamate: file c4/yml/filter_processor.hpp must have been included at this point" +#endif /* C4_YML_FILTER_PROCESSOR_HPP_ */ + // amalgamate: removed include of // https://github.com/biojppm/rapidyaml/src/c4/yml/parse.hpp //#include "c4/yml/parse.hpp" @@ -33611,6 +42580,20 @@ inline size_t print_tree(Tree const& p, size_t node=NONE) #error "amalgamate: file c4/yml/preprocess.hpp must have been included at this point" #endif /* C4_YML_PREPROCESS_HPP_ */ +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/reference_resolver.hpp +//#include "c4/yml/reference_resolver.hpp" +#if !defined(C4_YML_REFERENCE_RESOLVER_HPP_) && !defined(_C4_YML_REFERENCE_RESOLVER_HPP_) +#error "amalgamate: file c4/yml/reference_resolver.hpp must have been included at this point" +#endif /* C4_YML_REFERENCE_RESOLVER_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/tag.hpp +//#include "c4/yml/tag.hpp" +#if !defined(C4_YML_TAG_HPP_) && !defined(_C4_YML_TAG_HPP_) +#error "amalgamate: file c4/yml/tag.hpp must have been included at this point" +#endif /* C4_YML_TAG_HPP_ */ + #endif // _C4_YML_YML_HPP_