diff --git a/README.md b/README.md index fcbefb1..319728b 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ![Library Status](https://raw.githubusercontent.com/bemanproject/beman/refs/heads/main/images/badges/beman_badge-beman_library_under_development.svg) ![Continuous Integration Tests](https://github.com/dascandy/cstring_view/actions/workflows/ci_tests.yml/badge.svg) ![Lint Check (pre-commit)](https://github.com/dascandy/cstring_view/actions/workflows/pre-commit.yml/badge.svg) -`beman.cstring_view` is a header-only `cstring_view` library. +`beman.cstring_view` is a header-only `cstring_view` library. **Implements**: `std::cstring_view` proposed in [cstring\_view (P3655R2)](https://wg21.link/P3655R2). @@ -15,7 +15,8 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ## Usage -`std::cstring_view` exposes a string\_view like type that is intended for being able to propagate prior knowledge that a string is null-terminated throughout the type system, while fulfilling the same role as string\_view. +`std::cstring_view` exposes a string\_view like type that is intended for being able to propagate prior knowledge that +a string is null-terminated throughout the type system, while fulfilling the same role as string\_view. ### Usage: default projection in constrained algorithms @@ -27,8 +28,7 @@ The following code snippet illustrates how we can use `cstring_view` to make a b int main(int argc, const char** argv) { std::vector args(argv, argv+argc); - -} +} ``` Full runnable examples can be found in [`examples/`](examples/). diff --git a/docs/using_cstring_view.md b/docs/using_cstring_view.md index 7ae2452..14daebd 100644 --- a/docs/using_cstring_view.md +++ b/docs/using_cstring_view.md @@ -1,28 +1,30 @@ -# Using cstring_view +# Using `cstring_view` -The cstring_view proposal creates a new type that is a null-terminated string view. To use the library in your code, add the project in your cmakefiles: +The `cstring_view` proposal creates a new type that is a null-terminated string view. To use the library in your code, +add the project in your cmakefiles: ```cmake add_subdirectory(beman/cstring_view) ``` -and add the beman::cstring_view project to your target +and add the `beman::cstring_view` project to your target ```cmake target_link_libraries(my_project PRIVATE beman/cstring_view) ``` -Subsequently include the cstring_view header in your code +Subsequently include the `cstring_view` header in your code ```cpp #include ``` -From this point on, you're able to use the cstring_view type to handle cases where you have NUL-terminated strings. +From this point on, you're able to use the `cstring_view` type to handle cases where you have NUL-terminated strings. -# Simple examples +## Simple examples -Creating a `cstring_view` can be done from a `std::string`, from a string literal, or from a pointer or buffer that is known to contain a NUL-terminated string. For example: +Creating a `cstring_view` can be done from a `std::string`, from a string literal, or from a pointer or buffer that is +known to contain a NUL-terminated string. For example: ```cpp cstring_view v = "Hello World!"; @@ -35,5 +37,3 @@ Such a `cstring_view` can be used in places where a NUL-terminated string is exp ```cpp puts(v.c_str()); ``` - - diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 5222fbf..857a14a 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -10,7 +10,10 @@ message("Examples to be built: ${ALL_EXAMPLES}") foreach(example ${ALL_EXAMPLES}) add_executable(beman.cstring_view.examples.${example}) - target_sources(beman.cstring_view.examples.${example} PRIVATE ${example}.cpp) + target_sources( + beman.cstring_view.examples.${example} + PRIVATE ${example}.cpp + ) target_link_libraries( beman.cstring_view.examples.${example} PRIVATE beman::cstring_view diff --git a/examples/example.cpp b/examples/example.cpp index 58bbd0f..5811bc2 100644 --- a/examples/example.cpp +++ b/examples/example.cpp @@ -23,7 +23,7 @@ std::string_view to_string(std::strong_ordering order) { } int main() { - std::string s = "hello world"; + std::string s = "hello world"; beman::cstring_view z0 = "hello"; beman::cstring_view z1 = s; beman::cstring_view empty; @@ -44,8 +44,8 @@ int main() { std::cout << z0.c_str() << "\n"; std::cout << "\"" << empty << "\"\n"; std::cout << (empty == ""_csv) << "\n"; - - std::wstring ws = L"hello world"; + + std::wstring ws = L"hello world"; beman::wcstring_view wz0 = L"hello"; beman::wcstring_view wz1 = ws; beman::wcstring_view wempty; @@ -55,7 +55,7 @@ int main() { std::cout << wz0.starts_with(L"hello") << "\n"; std::cout << wz0.starts_with(L"hello"_csv) << "\n"; std::cout << std::hash{}(wz1) << "\n"; - std::wcout< namespace beman { - // [cstring.view.template], class template basic_cstring_view - template> - class basic_cstring_view; // partially freestanding + +// [cstring.view.template], class template basic_cstring_view +template > +class basic_cstring_view; // partially freestanding /* #if __cpp_lib_ranges >= 201911L - namespace ranges { - template - constexpr bool enable_view> = true; - template - constexpr bool enable_borrowed_range> = true; - } +namespace ranges { + template + constexpr bool enable_view> = true; + template + constexpr bool enable_borrowed_range> = true; +} #endif */ - // [cstring.view.comparison], non-member comparison functions - template - constexpr bool operator==( - basic_cstring_view x, - std::type_identity_t> y - ) noexcept; +// [cstring.view.comparison], non-member comparison functions +template +constexpr bool operator==(basic_cstring_view x, + std::type_identity_t> y) noexcept; - template - constexpr auto operator<=>( - basic_cstring_view x, - std::type_identity_t> y - ) noexcept; +template +constexpr auto operator<=>(basic_cstring_view x, + std::type_identity_t> y) noexcept; - // [cstring.view.io], inserters and extractors - template - std::basic_ostream& operator<<(std::basic_ostream& os, basic_cstring_view str); - - // basic_cstring_view typedef-names - using cstring_view = basic_cstring_view; - using u8cstring_view = basic_cstring_view; - using u16cstring_view = basic_cstring_view; - using u32cstring_view = basic_cstring_view; - using wcstring_view = basic_cstring_view; - - // [cstring.view.hash], hash support - template struct hash; - template<> struct hash; - template<> struct hash; - template<> struct hash; - template<> struct hash; - template<> struct hash; - - inline namespace literals { - inline namespace cstring_view_literals { - #ifndef _MSC_VER - #pragma GCC diagnostic push - #ifdef __clang__ - #pragma GCC diagnostic ignored "-Wuser-defined-literals" - #else - #pragma GCC diagnostic ignored "-Wliteral-suffix" - #endif - #else - #pragma warning(push) - #pragma warning(disable: 4455) - #endif - // [cstring.view.literals], suffix for basic_cstring_view literals - constexpr cstring_view operator"" csv(const char* str, size_t len) noexcept; - constexpr u8cstring_view operator"" csv(const char8_t* str, size_t len) noexcept; - constexpr u16cstring_view operator"" csv(const char16_t* str, size_t len) noexcept; - constexpr u32cstring_view operator"" csv(const char32_t* str, size_t len) noexcept; - constexpr wcstring_view operator"" csv(const wchar_t* str, size_t len) noexcept; - #ifndef _MSC_VER - #pragma GCC diagnostic pop - #else - #pragma warning(pop) - #endif - } - } -} +// [cstring.view.io], inserters and extractors +template +std::basic_ostream& operator<<(std::basic_ostream& os, + basic_cstring_view str); -namespace beman { - template */> - class basic_cstring_view { - public: - // types - using traits_type = traits; - using value_type = charT; - using pointer = value_type*; - using const_pointer = const value_type*; - using reference = value_type&; - using const_reference = const value_type&; - using const_iterator = const charT*; // see [cstring.view.iterators] - using iterator = const_iterator; - using const_reverse_iterator = std::reverse_iterator; - using reverse_iterator = const_reverse_iterator; - using size_type = size_t; - using difference_type = ptrdiff_t; - static constexpr size_type npos = size_type(-1); - - // [cstring.view.cons], construction and assignment - constexpr basic_cstring_view() noexcept : size_() { - static const charT empty_string[1]{}; - data_ = std::data(empty_string); - } - constexpr basic_cstring_view(const basic_cstring_view&) noexcept = default; - constexpr basic_cstring_view& operator=(const basic_cstring_view&) noexcept = default; - constexpr basic_cstring_view(const charT* str) : basic_cstring_view(str, traits::length(str)) {} - constexpr basic_cstring_view(const charT* str, size_type len) : data_(str), size_(len) { - assert(str[len] == charT()); - } - constexpr basic_cstring_view(std::nullptr_t) = delete; +// basic_cstring_view typedef-names +using cstring_view = basic_cstring_view; +using u8cstring_view = basic_cstring_view; +using u16cstring_view = basic_cstring_view; +using u32cstring_view = basic_cstring_view; +using wcstring_view = basic_cstring_view; - // NOTE: Not part of proposal, just to make examples work since I can't add the conversion operator to - // basic_string. - template - constexpr basic_cstring_view(const std::basic_string& str) - : basic_cstring_view(str.c_str(), str.size()) {} +// [cstring.view.hash], hash support +template +struct hash; +template <> +struct hash; +template <> +struct hash; +template <> +struct hash; +template <> +struct hash; +template <> +struct hash; - // [cstring.view.iterators], iterator support - constexpr const_iterator begin() const noexcept { - return data_; - } - constexpr const_iterator end() const noexcept { - return data_ + size_; - } - constexpr const_iterator cbegin() const noexcept { - return begin(); - } - constexpr const_iterator cend() const noexcept { - return end(); - } - constexpr const_reverse_iterator rbegin() const noexcept { - return data_ + size_; - } - constexpr const_reverse_iterator rend() const noexcept { - return data_; - } - constexpr const_reverse_iterator crbegin() const noexcept { - return rbegin(); - } - constexpr const_reverse_iterator crend() const noexcept { - return rend(); - } +inline namespace literals { +inline namespace cstring_view_literals { +#ifndef _MSC_VER +#pragma GCC diagnostic push +#ifdef __clang__ +#pragma GCC diagnostic ignored "-Wuser-defined-literals" +#else +#pragma GCC diagnostic ignored "-Wliteral-suffix" +#endif +#else +#pragma warning(push) +#pragma warning(disable : 4455) +#endif +// [cstring.view.literals], suffix for basic_cstring_view literals +constexpr cstring_view operator"" csv(const char* str, size_t len) noexcept; +constexpr u8cstring_view operator"" csv(const char8_t* str, size_t len) noexcept; +constexpr u16cstring_view operator"" csv(const char16_t* str, size_t len) noexcept; +constexpr u32cstring_view operator"" csv(const char32_t* str, size_t len) noexcept; +constexpr wcstring_view operator"" csv(const wchar_t* str, size_t len) noexcept; +#ifndef _MSC_VER +#pragma GCC diagnostic pop +#else +#pragma warning(pop) +#endif +} // namespace cstring_view_literals +} // namespace literals +} // namespace beman - // [cstring.view.capacity], capacity - constexpr size_type size() const noexcept { - return size_; - } - constexpr size_type length() const noexcept { - return size_; - } - constexpr size_type max_size() const noexcept { - return std::basic_string_view{}.max_size() - 1; - } - [[nodiscard]] constexpr bool empty() const noexcept { - return size_ == 0; - } +namespace beman { - // [cstring.view.access], element access - constexpr const_reference operator[](size_type pos) const { - assert(pos <= size_); - return data_[pos]; - } - constexpr const_reference at(size_type pos) const { - if(pos > size_) { - throw std::out_of_range(std::format("basic_cstring_view::at: pos ({}) > size() {}", pos, size_)); - } - return data_[pos]; - } - constexpr const_reference front() const { - assert(!empty()); - return data_[0]; - } - constexpr const_reference back() const { - assert(!empty()); - return data_[size_ - 1]; - } - constexpr const_pointer data() const noexcept { - return data_; - } - constexpr const_pointer c_str() const noexcept { - return data_; - } +template */> +class basic_cstring_view { + public: + // types + using traits_type = traits; + using value_type = charT; + using pointer = value_type*; + using const_pointer = const value_type*; + using reference = value_type&; + using const_reference = const value_type&; + using const_iterator = const charT*; // see [cstring.view.iterators] + using iterator = const_iterator; + using const_reverse_iterator = std::reverse_iterator; + using reverse_iterator = const_reverse_iterator; + using size_type = size_t; + using difference_type = ptrdiff_t; + static constexpr size_type npos = size_type(-1); - operator std::basic_string_view() const noexcept { - return std::basic_string_view{data_, size_}; - } + // [cstring.view.cons], construction and assignment + constexpr basic_cstring_view() noexcept : size_() { + static const charT empty_string[1]{}; + data_ = std::data(empty_string); + } + constexpr basic_cstring_view(const basic_cstring_view&) noexcept = default; + constexpr basic_cstring_view& operator=(const basic_cstring_view&) noexcept = default; + constexpr basic_cstring_view(const charT* str) : basic_cstring_view(str, traits::length(str)) {} + constexpr basic_cstring_view(const charT* str, size_type len) : data_(str), size_(len) { + assert(str[len] == charT()); + } + constexpr basic_cstring_view(std::nullptr_t) = delete; - // [cstring.view.modifiers], modifiers - constexpr void remove_prefix(size_type n) { - assert(n <= size()); - data_ += n; - size_ -= n; - } - constexpr void swap(basic_cstring_view& s) noexcept { - std::swap(data_, s.data_); - std::swap(size_, s.size_); - } + // NOTE: Not part of proposal, just to make examples work since I can't add the conversion operator to + // basic_string. + template + constexpr basic_cstring_view(const std::basic_string& str) + : basic_cstring_view(str.c_str(), str.size()) {} - // [cstring.view.ops], cstring operations - constexpr size_type copy(charT* s, size_type n, size_type pos = 0) const { - return std::basic_string_view(*this).copy(s, n, pos); - } + // [cstring.view.iterators], iterator support + constexpr const_iterator begin() const noexcept { return data_; } + constexpr const_iterator end() const noexcept { return data_ + size_; } + constexpr const_iterator cbegin() const noexcept { return begin(); } + constexpr const_iterator cend() const noexcept { return end(); } + constexpr const_reverse_iterator rbegin() const noexcept { return data_ + size_; } + constexpr const_reverse_iterator rend() const noexcept { return data_; } + constexpr const_reverse_iterator crbegin() const noexcept { return rbegin(); } + constexpr const_reverse_iterator crend() const noexcept { return rend(); } - constexpr std::basic_string_view substr(size_type pos, size_type n) const { - return std::basic_string_view(*this).substr(pos, n); - } - constexpr basic_cstring_view substr(size_type pos = 0) const { - return { data_ + pos, size_ - pos }; - } + // [cstring.view.capacity], capacity + constexpr size_type size() const noexcept { return size_; } + constexpr size_type length() const noexcept { return size_; } + constexpr size_type max_size() const noexcept { return std::basic_string_view{}.max_size() - 1; } + [[nodiscard]] constexpr bool empty() const noexcept { return size_ == 0; } - constexpr int compare(std::basic_string_view s) const noexcept { - return std::basic_string_view(*this).compare(s); - } - constexpr int compare(size_type pos1, size_type n1, basic_cstring_view s) const { - return std::basic_string_view(*this).compare(pos1, n1, s); - } - constexpr int compare(size_type pos1, size_type n1, basic_cstring_view s, - size_type pos2, size_type n2) const { - return std::basic_string_view(*this).compare(pos1, n1, s, pos2, n2); - } - constexpr int compare(const charT* s) const { - return std::basic_string_view(*this).compare(s); - } - constexpr int compare(size_type pos1, size_type n1, const charT* s) const { - return std::basic_string_view(*this).compare(pos1, n1, s); - } - constexpr int compare(size_type pos1, size_type n1, const charT* s, size_type n2) const { - return std::basic_string_view(*this).compare(pos1, n1, s, n2); + // [cstring.view.access], element access + constexpr const_reference operator[](size_type pos) const { + assert(pos <= size_); + return data_[pos]; + } + constexpr const_reference at(size_type pos) const { + if (pos > size_) { + throw std::out_of_range(std::format("basic_cstring_view::at: pos ({}) > size() {}", pos, size_)); } + return data_[pos]; + } + constexpr const_reference front() const { + assert(!empty()); + return data_[0]; + } + constexpr const_reference back() const { + assert(!empty()); + return data_[size_ - 1]; + } + constexpr const_pointer data() const noexcept { return data_; } + constexpr const_pointer c_str() const noexcept { return data_; } - constexpr bool starts_with(std::basic_string_view x) const noexcept { - return std::basic_string_view(*this).starts_with(x); - } - constexpr bool starts_with(charT x) const noexcept { - return std::basic_string_view(*this).starts_with(x); - } - constexpr bool starts_with(const charT* x) const { - return std::basic_string_view(*this).starts_with(x); - } - constexpr bool ends_with(std::basic_string_view x) const noexcept { - return std::basic_string_view(*this).ends_with(x); - } - constexpr bool ends_with(charT x) const noexcept { - return std::basic_string_view(*this).ends_with(x); - } - constexpr bool ends_with(const charT* x) const { - return std::basic_string_view(*this).ends_with(x); - } + operator std::basic_string_view() const noexcept { + return std::basic_string_view{data_, size_}; + } - constexpr bool contains(std::basic_string_view x) const noexcept { - return std::basic_string_view(*this).contains(x); - } - constexpr bool contains(charT x) const noexcept { - return std::basic_string_view(*this).contains(x); - } - constexpr bool contains(const charT* x) const { - return std::basic_string_view(*this).contains(x); - } + // [cstring.view.modifiers], modifiers + constexpr void remove_prefix(size_type n) { + assert(n <= size()); + data_ += n; + size_ -= n; + } + constexpr void swap(basic_cstring_view& s) noexcept { + std::swap(data_, s.data_); + std::swap(size_, s.size_); + } - // [cstring.view.find], searching - constexpr size_type find(std::basic_string_view s, size_type pos = 0) const noexcept { - return std::basic_string_view(*this).find(s, pos); - } - constexpr size_type find(charT c, size_type pos = 0) const noexcept { - return std::basic_string_view(*this).find(c, pos); - } - constexpr size_type find(const charT* s, size_type pos, size_type n) const { - return std::basic_string_view(*this).find(s, pos, n); - } - constexpr size_type find(const charT* s, size_type pos = 0) const { - return std::basic_string_view(*this).find(s, pos); - } - constexpr size_type rfind(std::basic_string_view s, size_type pos = npos) const noexcept { - return std::basic_string_view(*this).rfind(s, pos); - } - constexpr size_type rfind(charT c, size_type pos = npos) const noexcept { - return std::basic_string_view(*this).rfind(c, pos); - } - constexpr size_type rfind(const charT* s, size_type pos, size_type n) const { - return std::basic_string_view(*this).rfind(s, pos, n); - } - constexpr size_type rfind(const charT* s, size_type pos = npos) const { - return std::basic_string_view(*this).rfind(s, pos); - } + // [cstring.view.ops], cstring operations + constexpr size_type copy(charT* s, size_type n, size_type pos = 0) const { + return std::basic_string_view(*this).copy(s, n, pos); + } - constexpr size_type find_first_of(std::basic_string_view s, size_type pos = 0) const noexcept { - return std::basic_string_view(*this).find_first_of(s, pos); - } - constexpr size_type find_first_of(charT c, size_type pos = 0) const noexcept { - return std::basic_string_view(*this).find_first_of(c, pos); - } - constexpr size_type find_first_of(const charT* s, size_type pos, size_type n) const { - return std::basic_string_view(*this).find_first_of(s, pos, n); - } - constexpr size_type find_first_of(const charT* s, size_type pos = 0) const { - return std::basic_string_view(*this).find_first_of(s, pos); - } - constexpr size_type find_last_of(std::basic_string_view s, size_type pos = npos) const noexcept { - return std::basic_string_view(*this).find_last_of(s, pos); - } - constexpr size_type find_last_of(charT c, size_type pos = npos) const noexcept { - return std::basic_string_view(*this).find_last_of(c, pos); - } - constexpr size_type find_last_of(const charT* s, size_type pos, size_type n) const { - return std::basic_string_view(*this).find_last_of(s, pos, n); - } - constexpr size_type find_last_of(const charT* s, size_type pos = npos) const { - return std::basic_string_view(*this).find_last_of(s, pos); - } - constexpr size_type find_first_not_of(std::basic_string_view s, size_type pos = 0) const noexcept { - return std::basic_string_view(*this).find_first_not_of(s, pos); - } - constexpr size_type find_first_not_of(charT c, size_type pos = 0) const noexcept { - return std::basic_string_view(*this).find_first_not_of(c, pos); - } - constexpr size_type find_first_not_of(const charT* s, size_type pos, size_type n) const { - return std::basic_string_view(*this).find_first_not_of(s, pos, n); - } - constexpr size_type find_first_not_of(const charT* s, size_type pos = 0) const { - return std::basic_string_view(*this).find_first_not_of(s, pos); - } - constexpr size_type find_last_not_of(std::basic_string_view s, size_type pos = npos) const noexcept { - return std::basic_string_view(*this).find_last_not_of(s, pos); - } - constexpr size_type find_last_not_of(charT c, size_type pos = npos) const noexcept { - return std::basic_string_view(*this).find_last_not_of(c, pos); - } - constexpr size_type find_last_not_of(const charT* s, size_type pos, size_type n) const { - return std::basic_string_view(*this).find_last_not_of(s, pos, n); - } - constexpr size_type find_last_not_of(const charT* s, size_type pos = npos) const { - return std::basic_string_view(*this).find_last_not_of(s, pos); - } + constexpr std::basic_string_view substr(size_type pos, size_type n) const { + return std::basic_string_view(*this).substr(pos, n); + } + constexpr basic_cstring_view substr(size_type pos = 0) const { return {data_ + pos, size_ - pos}; } - private: - const_pointer data_; // exposition only - size_type size_; // exposition only - }; - - inline namespace literals { - inline namespace cstring_view_literals { - // [cstring.view.literals], suffix for basic_cstring_view literals - constexpr cstring_view operator""_csv(const char* str, size_t len) noexcept { - return basic_cstring_view(str, len); - } - constexpr u8cstring_view operator""_csv(const char8_t* str, size_t len) noexcept { - return basic_cstring_view(str, len); - } - constexpr u16cstring_view operator""_csv(const char16_t* str, size_t len) noexcept { - return basic_cstring_view(str, len); - } - constexpr u32cstring_view operator""_csv(const char32_t* str, size_t len) noexcept { - return basic_cstring_view(str, len); - } - constexpr wcstring_view operator""_csv(const wchar_t* str, size_t len) noexcept { - return basic_cstring_view(str, len); - } - } + constexpr int compare(std::basic_string_view s) const noexcept { + return std::basic_string_view(*this).compare(s); + } + constexpr int compare(size_type pos1, size_type n1, basic_cstring_view s) const { + return std::basic_string_view(*this).compare(pos1, n1, s); + } + constexpr int compare(size_type pos1, size_type n1, basic_cstring_view s, size_type pos2, size_type n2) const { + return std::basic_string_view(*this).compare(pos1, n1, s, pos2, n2); + } + constexpr int compare(const charT* s) const { return std::basic_string_view(*this).compare(s); } + constexpr int compare(size_type pos1, size_type n1, const charT* s) const { + return std::basic_string_view(*this).compare(pos1, n1, s); + } + constexpr int compare(size_type pos1, size_type n1, const charT* s, size_type n2) const { + return std::basic_string_view(*this).compare(pos1, n1, s, n2); } - template - constexpr bool operator==( - basic_cstring_view x, - std::type_identity_t> y - ) noexcept { - return std::basic_string_view(x) == std::basic_string_view(y); + constexpr bool starts_with(std::basic_string_view x) const noexcept { + return std::basic_string_view(*this).starts_with(x); + } + constexpr bool starts_with(charT x) const noexcept { + return std::basic_string_view(*this).starts_with(x); + } + constexpr bool starts_with(const charT* x) const { + return std::basic_string_view(*this).starts_with(x); + } + constexpr bool ends_with(std::basic_string_view x) const noexcept { + return std::basic_string_view(*this).ends_with(x); + } + constexpr bool ends_with(charT x) const noexcept { + return std::basic_string_view(*this).ends_with(x); + } + constexpr bool ends_with(const charT* x) const { + return std::basic_string_view(*this).ends_with(x); } - template - constexpr auto operator<=>( - basic_cstring_view x, - std::type_identity_t> y - ) noexcept { - return std::basic_string_view(x) <=> std::basic_string_view(y); + constexpr bool contains(std::basic_string_view x) const noexcept { + return std::basic_string_view(*this).contains(x); + } + constexpr bool contains(charT x) const noexcept { + return std::basic_string_view(*this).contains(x); } + constexpr bool contains(const charT* x) const { return std::basic_string_view(*this).contains(x); } - template - std::basic_ostream& operator<<(std::basic_ostream& os, basic_cstring_view str) { - return os<(str); + // [cstring.view.find], searching + constexpr size_type find(std::basic_string_view s, size_type pos = 0) const noexcept { + return std::basic_string_view(*this).find(s, pos); + } + constexpr size_type find(charT c, size_type pos = 0) const noexcept { + return std::basic_string_view(*this).find(c, pos); + } + constexpr size_type find(const charT* s, size_type pos, size_type n) const { + return std::basic_string_view(*this).find(s, pos, n); + } + constexpr size_type find(const charT* s, size_type pos = 0) const { + return std::basic_string_view(*this).find(s, pos); + } + constexpr size_type rfind(std::basic_string_view s, size_type pos = npos) const noexcept { + return std::basic_string_view(*this).rfind(s, pos); + } + constexpr size_type rfind(charT c, size_type pos = npos) const noexcept { + return std::basic_string_view(*this).rfind(c, pos); + } + constexpr size_type rfind(const charT* s, size_type pos, size_type n) const { + return std::basic_string_view(*this).rfind(s, pos, n); + } + constexpr size_type rfind(const charT* s, size_type pos = npos) const { + return std::basic_string_view(*this).rfind(s, pos); } + constexpr size_type find_first_of(std::basic_string_view s, size_type pos = 0) const noexcept { + return std::basic_string_view(*this).find_first_of(s, pos); + } + constexpr size_type find_first_of(charT c, size_type pos = 0) const noexcept { + return std::basic_string_view(*this).find_first_of(c, pos); + } + constexpr size_type find_first_of(const charT* s, size_type pos, size_type n) const { + return std::basic_string_view(*this).find_first_of(s, pos, n); + } + constexpr size_type find_first_of(const charT* s, size_type pos = 0) const { + return std::basic_string_view(*this).find_first_of(s, pos); + } + constexpr size_type find_last_of(std::basic_string_view s, size_type pos = npos) const noexcept { + return std::basic_string_view(*this).find_last_of(s, pos); + } + constexpr size_type find_last_of(charT c, size_type pos = npos) const noexcept { + return std::basic_string_view(*this).find_last_of(c, pos); + } + constexpr size_type find_last_of(const charT* s, size_type pos, size_type n) const { + return std::basic_string_view(*this).find_last_of(s, pos, n); + } + constexpr size_type find_last_of(const charT* s, size_type pos = npos) const { + return std::basic_string_view(*this).find_last_of(s, pos); + } + constexpr size_type find_first_not_of(std::basic_string_view s, size_type pos = 0) const noexcept { + return std::basic_string_view(*this).find_first_not_of(s, pos); + } + constexpr size_type find_first_not_of(charT c, size_type pos = 0) const noexcept { + return std::basic_string_view(*this).find_first_not_of(c, pos); + } + constexpr size_type find_first_not_of(const charT* s, size_type pos, size_type n) const { + return std::basic_string_view(*this).find_first_not_of(s, pos, n); + } + constexpr size_type find_first_not_of(const charT* s, size_type pos = 0) const { + return std::basic_string_view(*this).find_first_not_of(s, pos); + } + constexpr size_type find_last_not_of(std::basic_string_view s, + size_type pos = npos) const noexcept { + return std::basic_string_view(*this).find_last_not_of(s, pos); + } + constexpr size_type find_last_not_of(charT c, size_type pos = npos) const noexcept { + return std::basic_string_view(*this).find_last_not_of(c, pos); + } + constexpr size_type find_last_not_of(const charT* s, size_type pos, size_type n) const { + return std::basic_string_view(*this).find_last_not_of(s, pos, n); + } + constexpr size_type find_last_not_of(const charT* s, size_type pos = npos) const { + return std::basic_string_view(*this).find_last_not_of(s, pos); + } + + private: + const_pointer data_; // exposition only + size_type size_; // exposition only +}; + +inline namespace literals { +inline namespace cstring_view_literals { +// [cstring.view.literals], suffix for basic_cstring_view literals +constexpr cstring_view operator""_csv(const char* str, size_t len) noexcept { return basic_cstring_view(str, len); } +constexpr u8cstring_view operator""_csv(const char8_t* str, size_t len) noexcept { + return basic_cstring_view(str, len); +} +constexpr u16cstring_view operator""_csv(const char16_t* str, size_t len) noexcept { + return basic_cstring_view(str, len); +} +constexpr u32cstring_view operator""_csv(const char32_t* str, size_t len) noexcept { + return basic_cstring_view(str, len); +} +constexpr wcstring_view operator""_csv(const wchar_t* str, size_t len) noexcept { + return basic_cstring_view(str, len); +} +} // namespace cstring_view_literals +} // namespace literals + +template +constexpr bool operator==(basic_cstring_view x, + std::type_identity_t> y) noexcept { + return std::basic_string_view(x) == std::basic_string_view(y); +} + +template +constexpr auto operator<=>(basic_cstring_view x, + std::type_identity_t> y) noexcept { + return std::basic_string_view(x) <=> std::basic_string_view(y); +} + +template +std::basic_ostream& operator<<(std::basic_ostream& os, + basic_cstring_view str) { + return os << std::basic_string_view(str); } +} // namespace beman + // [format.formatter.spec] -template +template struct std::formatter, charT> { formatter() = default; - constexpr auto parse(basic_format_parse_context& context) { - return sv_formatter.parse(context); - } - template + constexpr auto parse(basic_format_parse_context& context) { return sv_formatter.parse(context); } + template auto format(beman::basic_cstring_view csv, basic_format_context<_Out, charT>& context) const { return sv_formatter.format(csv, context); } -private: + + private: formatter, charT> sv_formatter; }; -template<> struct std::hash { - auto operator()(const beman::cstring_view& sv) const noexcept { - return std::hash{}(sv); - } +template <> +struct std::hash { + auto operator()(const beman::cstring_view& sv) const noexcept { return std::hash{}(sv); } }; -template<> struct std::hash { - auto operator()(const beman::u8cstring_view& sv) const noexcept { - return std::hash{}(sv); - } +template <> +struct std::hash { + auto operator()(const beman::u8cstring_view& sv) const noexcept { return std::hash{}(sv); } }; -template<> struct std::hash { - auto operator()(const beman::u16cstring_view& sv) const noexcept { - return std::hash{}(sv); - } +template <> +struct std::hash { + auto operator()(const beman::u16cstring_view& sv) const noexcept { return std::hash{}(sv); } }; -template<> struct std::hash { - auto operator()(const beman::u32cstring_view& sv) const noexcept { - return std::hash{}(sv); - } +template <> +struct std::hash { + auto operator()(const beman::u32cstring_view& sv) const noexcept { return std::hash{}(sv); } }; -template<> struct std::hash { - auto operator()(const beman::wcstring_view& sv) const noexcept { - return std::hash{}(sv); - } +template <> +struct std::hash { + auto operator()(const beman::wcstring_view& sv) const noexcept { return std::hash{}(sv); } }; #endif diff --git a/papers/P3655R2.md b/papers/P3655R2.md index e3bde3b..e5144c2 100644 --- a/papers/P3655R2.md +++ b/papers/P3655R2.md @@ -1,4 +1,4 @@ -std::zstring_view +# std::zstring_view | Document # | P3655R2 | | Date | 2025-06-15 | @@ -10,47 +10,87 @@ std::zstring_view | | Marco Foco | | | Alexey Shevlyakov | -# Abstract +## Abstract We propose a standard string view type that guarantees null-termination. -# Introduction +## Introduction -C++17 introduced `std::string_view`, a non-owning view of a continuous sequence of characters. It is cheap to use, offers fast operations, and replaced most uses of `const std::string&` or `const char*` as function parameters. The utility and interface of string views as well as the benefits of not having to do unnecessary `strlen` calculations on C-style strings makes string view types highly desirable for working with strings in C++. +C++17 introduced `std::string_view`, a non-owning view of a continuous sequence of characters. It is cheap to use, +offers fast operations, and replaced most uses of `const std::string&` or `const char*` as function parameters. The +utility and interface of string views as well as the benefits of not having to do unnecessary `strlen` calculations +on C-style strings makes string view types highly desirable for working with strings in C++. -Unlike `const std::string&` and many but not all `const char*`, `std::string_view` is not null-terminated. This allows it to have fast and cheap substring operations. However, this also means `std::string_view` is not a suitable replacement for either of the two aforementioned types whenever a null-terminated C-style string is needed. While most C++ code mostly interfaces with C++ code, it is not uncommon to need to use operating system calls, C interfaces, third-party library APIs, or even C++ standard library APIs which require null-terminated strings. Because of a lack of a desirable option for passing non-owned null-terminated strings, `std::string_view` parameters are none the less sometimes used today in cases where null-terminated strings are needed, calling `std::string_view::data` to get a `const char*`. This is, needless to say, very bug-prone. +Unlike `const std::string&` and many but not all `const char*`, `std::string_view` is not null-terminated. This +allows it to have fast and cheap substring operations. However, this also means `std::string_view` is not a suitable +replacement for either of the two aforementioned types whenever a null-terminated C-style string is needed. While +most C++ code mostly interfaces with C++ code, it is not uncommon to need to use operating system calls, C +interfaces, third-party library APIs, or even C++ standard library APIs which require null-terminated strings. +Because of a lack of a desirable option for passing non-owned null-terminated strings, `std::string_view` parameters +are none the less sometimes used today in cases where null-terminated strings are needed, calling +`std::string_view::data` to get a `const char*`. This is, needless to say, very bug-prone. -For this reason, many C++ developers use custom `zstring_view` or `cstring_view` types which are guaranteed to be null-terminated. We propose standardizing this utility. +For this reason, many C++ developers use custom `zstring_view` or `cstring_view` types which are guaranteed to be +null-terminated. We propose standardizing this utility. -# Previous Papers +## Previous Papers -The idea of `zstring_view` was present even in the original paper for `string_view` (Sept 2012 - Feb 2014) [N3921](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n3921.html#null-termination): +The idea of `zstring_view` was present even in the original paper for `string_view` (Sept 2012 - Feb 2014) +[N3921](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n3921.html#null-termination): -> Another option would be to define a separate zstring_view class to represent null-terminated strings and let it decay to string_view when necessary. That's plausible but not part of this proposal. +> Another option would be to define a separate zstring_view class to represent null-terminated strings and let it +> decay to string_view when necessary. That's plausible but not part of this proposal. -An attempt was made in Feb 2019 to concretely propose the type (renamed to `cstring_view`) with [P1402](http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2019/p1402r0.pdf), but it failed to gain consensus in LEWGI at [Kona](https://wiki.edg.com/bin/view/Wg21kona2019/P1402). In fact, the discussion there concluded with "CONSENSUS: We will not pursue P1402R0 or this problem space." It is also worth noting that contracts were briefly contemplated in the minutes as a way of addressing the problem, however, contracts can't check for a null character safely as it wouldn't be part of the view. Similarly, other runtime checks for `string_view` null-termination are off the table. +An attempt was made in Feb 2019 to concretely propose the type (renamed to `cstring_view`) with +[P1402](http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2019/p1402r0.pdf), but it failed to gain consensus in +LEWGI at [Kona](https://wiki.edg.com/bin/view/Wg21kona2019/P1402). In fact, the discussion there concluded with +"CONSENSUS: We will not pursue P1402R0 or this problem space." It is also worth noting that contracts were briefly +contemplated in the minutes as a way of addressing the problem, however, contracts can't check for a null character +safely as it wouldn't be part of the view. Similarly, other runtime checks for `string_view` null-termination are +off the table. -However, in 2024 [P3081 Core safety profiles for C++26](https://wg21.link/p3081) was published which contains an aside regarding a null-terminated `zstring_view` in section 8, noting: +However, in 2024 [P3081 Core safety profiles for C++26](https://wg21.link/p3081) was published which contains an +aside regarding a null-terminated `zstring_view` in section 8, noting: -> This is one of the commonly-requested features from the [GSL](https://github.com/microsoft/GSL) library that does not yet have a std:: equivalent. It was specifically requested by several reviewers of this work. +> This is one of the commonly-requested features from the [GSL](https://github.com/microsoft/GSL) library that does +> not yet have a std:: equivalent. It was specifically requested by several reviewers of this work. -During the discussion, it was made clear that `zstring_view` deserves attention on its own and it should not be happenstance introduced as part of an entirely different topic. While P3081 did not end up passing for reasons other than `zstring_view`, it is another example of the utility being seen as desirable. +During the discussion, it was made clear that `zstring_view` deserves attention on its own and it should not be +happenstance introduced as part of an entirely different topic. While P3081 did not end up passing for reasons +other than `zstring_view`, it is another example of the utility being seen as desirable. -We also have [P2996 Reflection for C++26](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2025/p2996r9.html), which in its specification of functions returning a `string_view` or `u8string_view` tries its best to say it's actually supposed to be a `zstring_view`, except without naming the type for existential reasons: +We also have [P2996 Reflection for C++26](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2025/p2996r9.html), +which in its specification of functions returning a `string_view` or `u8string_view` tries its best to say it's +actually supposed to be a `zstring_view`, except without naming the type for existential reasons: -> Any function in namespace std::meta that whose return type is string_view or u8string_view returns an object V such that `V.data()[V.size()] == '\0'`. +> Any function in namespace std::meta that whose return type is string_view or u8string_view returns an object V +such that `V.data()[V.size()] == '\0'`. -As such, we do believe that there is both space for such a type, and a desire from multiple angles to have it defined in the standard library. +As such, we do believe that there is both space for such a type, and a desire from multiple angles to have it +defined in the standard library. -# Reasons to have the type +## Reasons to have the type -Many functions right now whether C++ standard library calls, operating system calls, or third-party library calls require null-terminated C-style strings. Absent an efficient type that can represent a non-owning view of a null-terminated string, these functions must take either a `const char*` and incur potential `strlen` overhead, take a `const std::string&` and possibly superfluously copy and allocate, take a `string_view` and make a copy, or haphazardly take a `std::string_view` with a fragile unenforceable contract that it is a view of a null-terminated string. Such a contract would be unenforceable because `pre(sv[sv.size()] == 0)` is potentially undefined behavior. +Many functions right now whether C++ standard library calls, operating system calls, or third-party library calls +require null-terminated C-style strings. Absent an efficient type that can represent a non-owning view of a +null-terminated string, these functions must take either a `const char*` and incur potential `strlen` overhead, take +a `const std::string&` and possibly superfluously copy and allocate, take a `string_view` and make a copy, or +haphazardly take a `std::string_view` with a fragile unenforceable contract that it is a view of a null-terminated +string. Such a contract would be unenforceable because `pre(sv[sv.size()] == 0)` is potentially undefined behavior. -String views tend to start their life coming from a string literal or from a type that owns its internal buffer, often `std::string`. Both of these are places that can always create a `zstring_view` instead, as they both know their data is inherently null terminated. +String views tend to start their life coming from a string literal or from a type that owns its internal buffer, +often `std::string`. Both of these are places that can always create a `zstring_view` instead, as they both know +their data is inherently null terminated. -Strings are very often just passed along with a non-owning string type all the way to where they are used as data. Some of these potential uses, such as calling `std::ofstream::write`, do not rely on the null terminator, but others, like `std::ofstream::ofstream`, do. Using `string_view` as the intermediate type loses the knowledge that a null terminator is already guaranteed to be present, requiring the developer to either make assumptions or make a copy. Another common use might be creating a stringstream from a string [StackOverflow question](https://stackoverflow.com/questions/58524805/is-there-a-way-to-create-a-stringstream-from-a-string-view-without-copying-data). +Strings are very often just passed along with a non-owning string type all the way to where they are used as data. +Some of these potential uses, such as calling `std::ofstream::write`, do not rely on the null terminator, but others, +like `std::ofstream::ofstream`, do. Using `string_view` as the intermediate type loses the knowledge that a null +terminator is already guaranteed to be present, requiring the developer to either make assumptions or make a copy. +Another common use might be creating a stringstream from a string [StackOverflow question](https://stackoverflow.com/questions/58524805/is-there-a-way-to-create-a-stringstream-from-a-string-view-without-copying-data). -We do not have to look far for examples of `std::string_view` being used in bug-prone ways with APIs expecting null-terminators. Searching `/\.data\(\)/ string_view language:c++ -is:fork` on GitHub code search turns up two examples on the first page: +We do not have to look far for examples of `std::string_view` being used in bug-prone ways with APIs expecting +null-terminators. Searching `/\.data\(\)/ string_view language:c++ -is:fork` on GitHub code search turns up two +examples on the first page: From [surge-synthesizer/shortcircuit-xt](https://github.com/surge-synthesizer/shortcircuit-xt/blob/ba6ea3a2e703e4fa0ed069427aa42b668699b624/libs/md5sum/demo.cc#L37) @@ -69,53 +109,89 @@ void log(Level level, std::string_view funcName, std::string_view filename, unsi ... ``` -These two examples could be argued to be bad code or misuses of `std::string_view`, however, the fact that they are written reflects the desire for the ability to use string view types in such cases. +These two examples could be argued to be bad code or misuses of `std::string_view`, however, the fact that they are +written reflects the desire for the ability to use string view types in such cases. -These problems are visible enough that we have targeted patches for specific instances of this problem in the standard - [P2495](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p2495r0.pdf) attempts to directly patch this specific example. The solution bypasses that the problem is that we're missing an unbroken type-safe chain of knowledge that the value being passed is or is not null-terminated. +These problems are visible enough that we have targeted patches for specific instances of this problem in the +standard - [P2495](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p2495r0.pdf) attempts to directly patch +this specific example. The solution bypasses that the problem is that we're missing an unbroken type-safe chain of +knowledge that the value being passed is or is not null-terminated. -The type effectively fills out the design space that exists around strings within C++. We started off with `std::string` as an owning string type, ambiguously being specified as having a null terminator or not, clarified in C++11 to definitely *have* a null terminator. C++14 added `std::string_view` to that set, offering a non-null-terminated non-owning string type. The type itself raises the question, should we add a non-null-terminated owning string type, and/or a null-terminated non-owning string type? We're proposing the last of these, leaving only the non-null-terminated owning string type as not present. We believe there is no situation to be written where a non-null-terminated owning string type would have a measurable benefit over the null-terminated owning string type that we have, and as such it is not worth the added complexity. +The type effectively fills out the design space that exists around strings within C++. We started off with +`std::string` as an owning string type, ambiguously being specified as having a null terminator or not, clarified +in C++11 to definitely *have* a null terminator. C++14 added `std::string_view` to that set, offering a +non-null-terminated non-owning string type. The type itself raises the question, should we add a non-null-terminated +owning string type, and/or a null-terminated non-owning string type? We're proposing the last of these, leaving only +the non-null-terminated owning string type as not present. We believe there is no situation to be written where a +non-null-terminated owning string type would have a measurable benefit over the null-terminated owning string type +that we have, and as such it is not worth the added complexity. -Without them we retain the question we had before - `const char*`, `const std::string&` or `string_view`? The first loses lots of type safety and potentially requires redundant `strlen` computation elsewhere in the software, the second requires it to be a `std::string` but retains null-termination knowledge, the last offers the ability to send any string type constructible to a string_view, but loses any knowledge of null termination. For the common case of passing along "some string input" to a function that ends up requiring null termination, the proposed `zstring_view` is the only type that properly captures it. +Without them we retain the question we had before - `const char*`, `const std::string&` or `string_view`? The first +loses lots of type safety and potentially requires redundant `strlen` computation elsewhere in the software, the +second requires it to be a `std::string` but retains null-termination knowledge, the last offers the ability to send +any string type constructible to a `string_view`, but loses any knowledge of null termination. For the common case of +passing along "some string input" to a function that ends up requiring null termination, the proposed `zstring_view` +is the only type that properly captures it. -Since the first draft of this paper the use of `cstring_view` and `zstring_view` on Github has grown from 1.9k to 2.1k, indicating active use, and in many cases people adding new implementations of the type. Many contain the subtle bugs that we highlight in this paper, illustrating why it is a good idea to add the type to the standard. +Since the first draft of this paper the use of `cstring_view` and `zstring_view` on Github has grown from 1.9k to +2.1k, indicating active use, and in many cases people adding new implementations of the type. Many contain the subtle +bugs that we highlight in this paper, illustrating why it is a good idea to add the type to the standard. -# Reasons not to have the type +## Reasons not to have the type -In an ideal world, we could actually fix the operating systems and third-party libraries to accept pointer-and-size strings in all places, removing the need for null termination to exist, and for null termination propagation to be relevant. Sometimes, in particular from environments where this may not be unrealistic in a subset, the argument is voiced that `zstring_view` does not offer any benefits. +In an ideal world, we could actually fix the operating systems and third-party libraries to accept pointer-and-size +strings in all places, removing the need for null termination to exist, and for null termination propagation to be +relevant. Sometimes, in particular from environments where this may not be unrealistic in a subset, the argument is +voiced that `zstring_view` does not offer any benefits. -Adding the type complicates the type system around strings by having more options for string types. This is not as much of an argument as it seems, since for each site a single type is the most appropriate, and the only places that would differ are those where the difference can make a meaningful performance impact - ie, the exact kind of tight loop where the type is useful for being able to make the difference. +Adding the type complicates the type system around strings by having more options for string types. This is not as +much of an argument as it seems, since for each site a single type is the most appropriate, and the only places that +would differ are those where the difference can make a meaningful performance impact - ie, the exact kind of tight +loop where the type is useful for being able to make the difference. -# Subtle details +## Subtle details -## zstring_view and string_view relation +### zstring_view and string_view relation -Initially we expected that `zstring_view` may be implementable in terms of `string_view`. This is not actually true; in particular it is not well-formed to use `string_view`'s `operator=` to assign a non-null-terminated `string_view` to a `zstring_view`. As such, there can not be an inheritance relation between the two. +Initially we expected that `zstring_view` may be implementable in terms of `string_view`. This is not actually true; +in particular it is not well-formed to use `string_view`'s `operator=` to assign a non-null-terminated `string_view` +to a `zstring_view`. As such, there can not be an inheritance relation between the two. -`zstring_view` is transparently convertible to a `string_view`. For the conversion operator, there is no `operator=` that could affect the original `zstring_view`, and as such it is a correct change to do. It in particular makes it so that any use of `zstring_view` in places where string_view is expected or desired will work without additional work. +`zstring_view` is transparently convertible to a `string_view`. For the conversion operator, there is no `operator=` +that could affect the original `zstring_view`, and as such it is a correct change to do. It in particular makes it +so that any use of `zstring_view` in places where string_view is expected or desired will work without additional +work. This brings into reality an overload ambiguity: -``` +```cpp void handle(string_view v); void handle(zstring_view v); handle("Hello World!"); ``` -This is now an ambiguous function call. It is not a new problem; [Example on Godbolt](https://godbolt.org/z/c31e31fKW) shows that the exact same problem already happens with regular `std::string` and `std::string_view`. The reason is somewhat fundamental; all three types model the concept "string" and *are* ambiguous. The user should be clear about which properties of string it's expecting. Adding this new type only adds to the vocabulary the option to say "non-owning null-terminated", giving the user better choices rather than complicating it. +This is now an ambiguous function call. It is not a new problem; [Example on Godbolt](https://godbolt.org/z/c31e31fKW) +shows that the exact same problem already happens with regular `std::string` and `std::string_view`. The reason is +somewhat fundamental; all three types model the concept "string" and *are* ambiguous. The user should be clear about +which properties of string it's expecting. Adding this new type only adds to the vocabulary the option to say +"non-owning null-terminated", giving the user better choices rather than complicating it. -### Deprioritizing one overload +#### Deprioritizing one overload -When one overload is preferred, but others should exist, the less preferred ones can be marked with `requires true`. This lifts one overload up out of the overload set for ambiguous matches, while leaving the rest to exist. +When one overload is preferred, but others should exist, the less preferred ones can be marked with `requires true`. +This lifts one overload up out of the overload set for ambiguous matches, while leaving the rest to exist. -### Tag type forced conversions for exact matches +#### Tag type forced conversions for exact matches -[https://godbolt.org/z/8qssnq8rf] and [https://godbolt.org/z/xG7955hf9] illustrate the idea of using a wrapping template to make one overload stand out. +[https://godbolt.org/z/8qssnq8rf] and [https://godbolt.org/z/xG7955hf9] illustrate the idea of using a wrapping +template to make one overload stand out. -### Explicitly adding a function to disambiguate manually +#### Explicitly adding a function to disambiguate manually -[https://godbolt.org/z/9zoGEh1cv] demonstrates adding an overload that matches anything that isn't an exact match, which indirects to a function that hand-picks an overload. +[https://godbolt.org/z/9zoGEh1cv] demonstrates adding an overload that matches anything that isn't an exact match, +which indirects to a function that hand-picks an overload. -## Construction +### Construction `std::basic_string_view` offers a handful of constructors: @@ -123,27 +199,32 @@ When one overload is preferred, but others should exist, the less preferred ones constexpr basic_string_view(const charT* str); // (1) ``` -The basic constructor from an unadorned `charT*` risks taking a non-null-terminated string and searching for the null terminator. For `zstring_view` this constructor is as safe as it is for `string_view`, since we need to have a null terminator anyway. +The basic constructor from an unadorned `charT*` risks taking a non-null-terminated string and searching for the null +terminator. For `zstring_view` this constructor is as safe as it is for `string_view`, since we need to have a null +terminator anyway. ```cpp constexpr basic_string_view(const charT* str, size_type len); // (2) ``` -This constructor conceptually offers a O(1) construction time. For zstring_view we add a contract that asserts that `str[len] == '\0'`. +This constructor conceptually offers a O(1) construction time. For zstring_view we add a contract that asserts that +`str[len] == '\0'`. ```cpp template constexpr basic_string_view(It begin, End end); // (3) ``` -This constructor is problematic to support, as we have to check the end iterator to ensure the string is null-terminated, but the end iterator cannot be assumed to be dereferencable. +This constructor is problematic to support, as we have to check the end iterator to ensure the string is +null-terminated, but the end iterator cannot be assumed to be dereferenceable. ```cpp template constexpr explicit basic_string_view(R&& r); // (4) ``` -This constructor is difficult to support as well; we need to check that after the end of the range there is a null terminator, which is by definition accessing out of the range. +This constructor is difficult to support as well; we need to check that after the end of the range there is a null +terminator, which is by definition accessing out of the range. ```cpp basic_string_view( std::nullptr_t ) = delete; // (5) @@ -151,54 +232,83 @@ basic_string_view( std::nullptr_t ) = delete; // (5) We should not support constructing a string from nullptr. -## Member functions on `string_view` that return a `string_view` +### Member functions on `string_view` that return a `string_view` -In some cases the functions can be replicated on `zstring_view` with the return type being a `zstring_view`, while in some cases the return type loses the ability to guarantee null termination, and should still return a `string_view`. The types are somewhat entangled. +In some cases the functions can be replicated on `zstring_view` with the return type being a `zstring_view`, while in +some cases the return type loses the ability to guarantee null termination, and should still return a `string_view`. +The types are somewhat entangled. -### `substr` +#### `substr` -The `substr` function is changed from a single function with a default argument, to two functions, one with one and one with two arguments. The one-argument `substr` always retains the end (identical to `string_view`'s `substr`) and returns a `zstring_view`; the two-argument `substr` at least in a conceptual sense chops off at least the null terminator from the end, and should always return a `string_view`. +The `substr` function is changed from a single function with a default argument, to two functions, one with one and +one with two arguments. The one-argument `substr` always retains the end (identical to `string_view`'s `substr`) and +returns a `zstring_view`; the two-argument `substr` at least in a conceptual sense chops off at least the null +terminator from the end, and should always return a `string_view`. -### `remove_prefix` +#### `remove_prefix` `remove_prefix` drops characters from the front of the string it's called on. Behavior fits. -### `remove_suffix` +#### `remove_suffix` -`remove_suffix` changes the string in-situ, and as such is unimplementable on zstring_view. It should be a function marked as =delete with a reason pointing users to use the two-argument `substr` function instead, which returns a `string_view`. +`remove_suffix` changes the string in-situ, and as such is unimplementable on `zstring_view`. It should be a function +marked as =delete with a reason pointing users to use the two-argument `substr` function instead, which returns a +`string_view`. -# Bikeshedding +## Bikeshedding -Null-terminated string view types are typically named `zstring_view` (N3921, P3081, GSL) or `cstring_view` (P1402). `cstring_view` follows the `std::string::c_str()` nomenclature while `zstring_view` has some establishment and recognizability. +Null-terminated string view types are typically named `zstring_view` (N3921, P3081, GSL) or `cstring_view` (P1402). +`cstring_view` follows the `std::string::c_str()` nomenclature while `zstring_view` has some establishment and +recognizability. -Github code search shows similar popularity between [`cstring_view`](https://github.com/search?q=%2F%5Cbcstring_view%5Cb%2F%20language%3Ac%2B%2B%20-is%3Afork&type=code) (1.2k results as of the time of writing) and [`zstring_view`](https://github.com/search?q=%2F%5Cbzstring_view%5Cb%2F+language%3Ac%2B%2B+-is%3Afork&type=code) (680 results as of the time of writing). In refreshing this paper, it appears that cstring_view has gained ~200 added results, while zstring_view only added 8, hinting at a popular preference for the former name. +Github code search shows similar popularity between +[`cstring_view`](https://github.com/search?q=%2F%5Cbcstring_view%5Cb%2F%20language%3Ac%2B%2B%20-is%3Afork&type=code) +(1.2k results as of the time of writing) and +[`zstring_view`](https://github.com/search?q=%2F%5Cbzstring_view%5Cb%2F+language%3Ac%2B%2B+-is%3Afork&type=code) +(680 results as of the time of writing). In refreshing this paper, it appears that `cstring_view` has gained ~200 added +results, while `zstring_view` only added 8, hinting at a popular preference for the former name. -# Reference Implementation +## Reference Implementation A reference implementation is at [Jeremy's Github](https://github.com/jeremy-rifkin/zstring_view). -# NVIDIA Experience (moved from p3710) +## NVIDIA Experience (moved from p3710) -NVIDIA implemented `zstring_view` independently, with almost identical features. This is described in [P3710 zstring_view: a string_view with guaranteed null termination](https://wg21.link/p3710), now merged into this paper. We also implemented some additional features described in [P3566 You shall not pass char*](https://wg21.link/p3566). +NVIDIA implemented `zstring_view` independently, with almost identical features. This is described in +[P3710 zstring_view: a string_view with guaranteed null termination](https://wg21.link/p3710), now merged into this +paper. We also implemented some additional features described in +[P3566 You shall not pass char\*](https://wg21.link/p3566). -Ideally we wanted our input parameters to all be `string_view` and all the output parameter to be `zstring_view` (which gives more flexibility). +Ideally we wanted our input parameters to all be `string_view` and all the output parameter to be `zstring_view` +(which gives more flexibility). In practice, there are exceptions on both sides: -- When a null-terminated parameter is needed for internal reasons outside of our control, i.e. when interacting with system calls or third-party libraries, it's preferable to have a `zstring_view` parameter. -- When returning a part of an internal string, e.g. when extracting the file name from a full path, it's more convenient to just return a `string_view`. +- When a null-terminated parameter is needed for internal reasons outside of our control, i.e. when interacting +with system calls or third-party libraries, it's preferable to have a `zstring_view` parameter. +- When returning a part of an internal string, e.g. when extracting the file name from a full path, it's more +convenient to just return a `string_view`. -Nonetheless, having a large codebase, we knew we couldn't just rewrite the entire codebase to apply this change, so we used `zstring_view` as a tool for steering our codebase in the ideal direction described above. +Nonetheless, having a large codebase, we knew we couldn't just rewrite the entire codebase to apply this change, so +we used `zstring_view` as a tool for steering our codebase in the ideal direction described above. -Initially, we used as `zstring_view` as a drop-in replacement for `const char*` parameters and return values of our APIs, without worrying too much wether or not the function expects a null terminator. As a sidenote, for this migration we asked our developers to be intentional in using `.data()` or `.c_str()`, and use `.data()` when a null-terminator is not expected, and only use `.c_str()` when the string is required to be terminated. If respected, this rule allows us to just try changing a parameter from `zstring_view` to `string_view` and verify wether or not a function is relying on some parameter to be null-terminated. This operation can be done incrementally, one function at a time. +Initially, we used as `zstring_view` as a drop-in replacement for `const char*` parameters and return values of our +APIs, without worrying too much whether or not the function expects a null terminator. As a sidenote, for this +migration we asked our developers to be intentional in using `.data()` or `.c_str()`, and use `.data()` when a +null-terminator is not expected, and only use `.c_str()` when the string is required to be terminated. If respected, +this rule allows us to just try changing a parameter from `zstring_view` to `string_view` and verify whether or not +a function is relying on some parameter to be null-terminated. This operation can be done incrementally, one function +at a time. -Once this was done, we started from the "deepest" functions to analyze their usage, and replace their parameters `zstring_view` to `string_view`, or rewrite them to allow for `string_view` parameters. We proceeded upwards in the call chain, and did the same. Again, this can be done in multiple passes. +Once this was done, we started from the "deepest" functions to analyze their usage, and replace their parameters +`zstring_view` to `string_view`, or rewrite them to allow for `string_view` parameters. We proceeded upwards in the +call chain, and did the same. Again, this can be done in multiple passes. What we observed is that, in some case, this might not only make the code safer, but also enable new optimizations. -Here follows an example of a parameter upgrade how we implemented these changes, step-by-step, with an example of how this can lead to more performant and safer code at the end of the process. +Here follows an example of a parameter upgrade how we implemented these changes, step-by-step, with an example of how +this can lead to more performant and safer code at the end of the process. - -``` +```cpp void f(const char* x) { external_library::g1(x); } @@ -230,7 +340,7 @@ In this example, `external_library::g1` is a placeholder for some generic comput Initially, we update the API we will change f to: -``` +```cpp void f(zstring_view x) { external_library::g1(x.c_str()); } @@ -238,9 +348,10 @@ void f(zstring_view x) { All the functions now have well-defined memory ranges (no unbounded string). -We can now then look for alternatives, and find out there's a different API for the `external_library::g1` we can use, e.g. `external_library::g2(char*, size_t)`, that doesn't require a null-terminated sequence. +We can now then look for alternatives, and find out there's a different API for the `external_library::g1` we can use, +e.g. `external_library::g2(char*, size_t)`, that doesn't require a null-terminated sequence. -``` +```cpp void f(string_view x) { external_library::g2(x.data(), x.size()); } @@ -248,7 +359,7 @@ void f(string_view x) { As next step, we can check all the usages, and see if we can get rid of some extra code, for example: -``` +```cpp void f4() { string_view a4 = "abcde"; string_view subtext = a4.substr(2, 2); // no null terminator at the end of `subtext` @@ -259,31 +370,42 @@ void f4() { This results in a better-optimized code at the end (less need for temporary `string`s). -NOTE: The sequence of changes in our codebase was different, because our codebase includes the changes proposed in [P3566 You shall not pass char*](https://wg21.link/p3566), so some of the intermediate steps relying on implicit `char*` -> `string_view` and `char*` -> `zstring_view` conversions require additional changes, and intermediate steps marking conversions with the proposed `unsafe_length` tag. +NOTE: The sequence of changes in our codebase was different, because our codebase includes the changes proposed in +[P3566 You shall not pass char\*](https://wg21.link/p3566), so some of the intermediate steps relying on implicit +`char*` -> `string_view` and `char*` -> `zstring_view` conversions require additional changes, and intermediate steps +marking conversions with the proposed `unsafe_length` tag. -# Standard Library Changes +## Standard Library Changes -There are a handful of interfaces in the standard library which take `const string&` and really want a `zstring_view` or possibly a `string_view`. These include: +There are a handful of interfaces in the standard library which take `const string&` and really want a `zstring_view` +or possibly a `string_view`. These include: -- Constructors for stdexcept types, `system_error`, `format_error`, `ios_base::failure`, and `std::filesystem::filesystem_error` +- Constructors for stdexcept types, `system_error`, `format_error`, `ios_base::failure`, and + `std::filesystem::filesystem_error` - The `stoi` family of functions - `random_device::random_device` - Lots of locale interfaces -- `basic_filebuf::open`, `basic_ifstream::basic_ifstream`, `basic_ifstream::open`, `basic_ofstream::basic_ofstream`, `basic_ofstream::open`, `basic_fstream::basic_fstream`, and `basic_fstream::open` +- `basic_filebuf::open`, `basic_ifstream::basic_ifstream`, `basic_ifstream::open`, `basic_ofstream::basic_ofstream`, + `basic_ofstream::open`, `basic_fstream::basic_fstream`, and `basic_fstream::open` -There are further interfaces which take `const char*` and could have overloads taking a `zstring_view` or `string_view`. +There are further interfaces which take `const char*` and could have overloads taking a `zstring_view` or +`string_view`. We do not propose any such changes in this paper but would like to investigate this in a follow-up paper. -# Polls +## Polls -The first question to LEWG is, do we want to have the zstring_view type in the standard library? So far it looks like voting against the type leads to people implementing their own, including C++ committee members "implementing" their own zstring_view in wording. We also have a few direction questions with regards to either "fixing" string interface, or keeping the interfaces synchronized. +The first question to LEWG is, do we want to have the zstring_view type in the standard library? So far it looks like +voting against the type leads to people implementing their own, including C++ committee members "implementing" their +own zstring_view in wording. We also have a few direction questions with regards to either "fixing" string interface, +or keeping the interfaces synchronized. -## We encourage further work on the zstring_view proposal for the C++29 timeframe. +### We encourage further work on the zstring_view proposal for the C++29 timeframe -If so, we will return at the next meeting (Kona 2025) with the wording worked out and checked with a Core representative, with the intent of merging zstring_view early in the C++29 cycle. If not, we will retire this paper. +If so, we will return at the next meeting (Kona 2025) with the wording worked out and checked with a Core +representative, with the intent of merging zstring_view early in the C++29 cycle. If not, we will retire this paper. -## We prefer the name `cstring_view` +### We prefer the name `cstring_view` The bikeshedding mentioned above. At this point we can still choose which name to use. The authors have no preference. @@ -291,38 +413,55 @@ The bikeshedding mentioned above. At this point we can still choose which name t - `cstring_view` matches existing `std::string` interface nomenclature (`c_str()`). - `zstring_view` is the most recently proposed name in [P3081](https://wg21.link/p3081) -## We prefer zstring_view to remove char_traits from its interface. +### We prefer zstring_view to remove char_traits from its interface -SG16 has long held that char_traits offer no value and are sometimes abused. If possible they would propose to remove it altogether [P3681]. This paper however is trying to add `zstring_view` to the standard without changing the direction for char_traits, and as such it is not proposing to remove char_traits. +SG16 has long held that char_traits offer no value and are sometimes abused. If possible they would propose to remove +it altogether [P3681]. This paper however is trying to add `zstring_view` to the standard without changing the +direction for char_traits, and as such it is not proposing to remove char_traits. -Removing it anyway would necessarily lose information on implicit conversion from std::string through std::zstring_view to std::string_view, as the char_traits that were present on the string are no longer possible to transfer to the string_view. There was a discussion in SG16 with regards to char_traits needing to be removed, needing to be kept, or whether no objection existed to retaining it for compatibility, and the following poll was taken: +Removing it anyway would necessarily lose information on implicit conversion from std::string through +std::zstring_view to std::string_view, as the char_traits that were present on the string are no longer possible to +transfer to the string_view. There was a discussion in SG16 with regards to char_traits needing to be removed, needing +to be kept, or whether no objection existed to retaining it for compatibility, and the following poll was taken: > Poll 1: P3655R0: No objection to use of std::char_traits for consistency and compatibility with std::string_view. | SF | F | N | A | SA | | 7 | 1 | 0 | 0 | 0 | -> Strong consensus. +> Strong consensus. > Attendees: 8 (no abstentions) -The paper authors hold no strong opinion on whether char_traits should be kept or removed. Pragmatically we propose to keep char_traits to make the conversion through zstring_view lossless, and to treat removal of char_traits support as a uniform thing to be applied to all string types. [P3681] disagrees, however, and we hope to see it up for discussion to reduce user-visible churn. +The paper authors hold no strong opinion on whether char_traits should be kept or removed. Pragmatically we propose to +keep char_traits to make the conversion through zstring_view lossless, and to treat removal of char_traits support as +a uniform thing to be applied to all string types. [P3681] disagrees, however, and we hope to see it up for discussion +to reduce user-visible churn. -## We prefer the zstring_view interface to add an array constructor +### We prefer the zstring_view interface to add an array constructor ```cpp template consteval basic_zstring_view(const charT (&str)[N]); ``` -The advantage of adding this function is that it is a clean constructor to convert from a string literal with compile-time known size. The downsides are that it will create an interface difference between `string_view` and `zstring_view`. +The advantage of adding this function is that it is a clean constructor to convert from a string literal with +compile-time known size. The downsides are that it will create an interface difference between `string_view` and +`zstring_view`. -## We prefer to disallow contained NULs +### We prefer to disallow contained NULs -String and string_view both support having a string with embedded NUL characters. On one hand it makes complete sense to do the same for zstring_view. On the other hand, the value of zstring_view lies in knowing it has a null terminator at `size()`, and while allowing embedded NULs would not strictly violate that - there still is a NUL terminator there - it would logically break it, as there would be a NUL before that that effectively terminates the string. +String and string_view both support having a string with embedded NUL characters. On one hand it makes complete sense +to do the same for zstring_view. On the other hand, the value of zstring_view lies in knowing it has a null terminator +at `size()`, and while allowing embedded NULs would not strictly violate that - there still is a NUL terminator +there - it would logically break it, as there would be a NUL before that that effectively terminates the string. -This gives rise to the possibility for bugs where strings are being used by their `strlen(x.c_str())` length, and elsewhere by their `x.size()`. These things are likely enough to have [CWE-135: Incorrect Calculation of Multi-Byte String Length](https://cwe.mitre.org/data/definitions/135.html) and [CWE-179: Incorrect Behavior Order: Early Validation](https://cwe.mitre.org/data/definitions/179.html), quote: +This gives rise to the possibility for bugs where strings are being used by their `strlen(x.c_str())` length, and +elsewhere by their `x.size()`. These things are likely enough to have +[CWE-135: Incorrect Calculation of Multi-Byte String Length](https://cwe.mitre.org/data/definitions/135.html) and +[CWE-179: Incorrect Behavior Order: Early Validation](https://cwe.mitre.org/data/definitions/179.html), quote: -> An attacker could include dangerous input that bypasses validation protection mechanisms which can be used to launch various attacks including injection attacks, execute arbitrary code or cause other unintended behavior. +> An attacker could include dangerous input that bypasses validation protection mechanisms which can be used to launch +> various attacks including injection attacks, execute arbitrary code or cause other unintended behavior. Consider the following call: @@ -332,26 +471,30 @@ validate_command(user_command.c_str()); // older validation function that does i evaluate_command(user_command); // newer function that uses user_command.size() ``` -We can prevent these bugs at this stage by disallowing contained NULs in the NUL-terminated string view type. But it is a breaking change to existing expectation. +We can prevent these bugs at this stage by disallowing contained NULs in the NUL-terminated string view type. But it +is a breaking change to existing expectation. -A strong argument against is that constructing a `zstring_view` from this string literal will behave differently than constructing a `string_view` would, and the invariant that `zstring_view` is null-terminated is not broken with an embedded NUL. +A strong argument against is that constructing a `zstring_view` from this string literal will behave differently than +constructing a `string_view` would, and the invariant that `zstring_view` is null-terminated is not broken with an +embedded NUL. -# Wording +## Wording -This wording section is currently mostly high-level and synopsis changes. We defer full wording until feedback from LEWG. +This wording section is currently mostly high-level and synopsis changes. We defer full wording until feedback from +LEWG. Borrowing extensively from existing `string_view` wording. Add to `[format.formatter.spec]` 2.2: -``` +```cpp template struct formatter, charT>; ``` Add to `[format.formatter.spec]` 4.1: -``` +```cpp template struct formatter, wchar_t>; ``` @@ -366,17 +509,21 @@ Add to [basic.string.general]: Wording on additions deferred until a later revision. -## Zstring View Classes [zstring.view] +### Zstring View Classes [zstring.view] -### General [zstring.view.general] +#### General [zstring.view.general] -The class template basic_zstring_view describes an object that can refer to a constant contiguous sequence of char-like ([strings.general]) objects with the first element of the sequence at position zero. In the rest of [string.view], the type of the char-like objects held in a `basic_string_view` object is designated by `charT`. +The class template basic_zstring_view describes an object that can refer to a constant contiguous sequence of +char-like ([strings.general]) objects with the first element of the sequence at position zero. In the rest of +[string.view], the type of the char-like objects held in a `basic_string_view` object is designated by `charT`. -In all cases, `[data(), data() + size()]` is a valid range and `data() + size()` points at an object with value `charT()` (a "null terminator"). +In all cases, `[data(), data() + size()]` is a valid range and `data() + size()` points at an object with value +`charT()` (a "null terminator"). -\[Note 1: `basic_zstring_view` is primarily useful when working with C APIs which expect null terminated strings. - end note] +\[Note 1: `basic_zstring_view` is primarily useful when working with C APIs which expect null +terminated strings. - end note] -#### Header `` synopsis [zstring.view.synop] +##### Header `` synopsis [zstring.view.synop] ```cpp // mostly freestanding @@ -434,9 +581,9 @@ namespace std { } ``` -### Class template basic_zstring_view [zstring.view.template] +#### Class template basic_zstring_view [zstring.view.template] -#### General [zstring.view.template.general] +##### General [zstring.view.template.general] ```cpp namespace std { @@ -465,7 +612,7 @@ namespace std { constexpr basic_zstring_view(const charT* str) noexcept pre(str != nullptr); constexpr basic_zstring_view(const charT* str, size_type len) noexcept - pre (str != nullptr) + pre (str != nullptr) pre (str[len] == '\0'); template constexpr basic_zstring_view(const charT (&str)[N]) noexcept @@ -501,7 +648,8 @@ namespace std { // [zstring.view.modifiers], modifiers constexpr void remove_prefix(size_type n); - constexpr void remove_suffix(size_type n) = delete("cannot remove_suffix in-place on zstring_view while retaining null terminator. Use substr instead."); + constexpr void remove_suffix(size_type n) = delete("cannot remove_suffix in-place on zstring_view while retaining + null terminator. Use substr instead."); constexpr void swap(basic_zstring_view& s) noexcept; // [zstring.view.ops], zstring operations diff --git a/src/beman/cstring_view/cstring_view.cpp b/src/beman/cstring_view/cstring_view.cpp index e533aee..0d608d5 100644 --- a/src/beman/cstring_view/cstring_view.cpp +++ b/src/beman/cstring_view/cstring_view.cpp @@ -1,2 +1 @@ #include - diff --git a/tests/beman/cstring_view/cstring_view.test.cpp b/tests/beman/cstring_view/cstring_view.test.cpp index d117efa..9d7e8b7 100644 --- a/tests/beman/cstring_view/cstring_view.test.cpp +++ b/tests/beman/cstring_view/cstring_view.test.cpp @@ -7,7 +7,7 @@ using namespace beman::literals; using namespace std::literals; TEST(StringView, ConstructionDestruction) { - std::string s = "hello"; + std::string s = "hello"; beman::cstring_view h1 = "hello"; beman::cstring_view h2 = h1;