MT Core (C++)
Core library for replacing C++ standard in project usage
Loading...
Searching...
No Matches
byte_endian.hpp
Go to the documentation of this file.
1/*
2
3Copyright 2025 Matthew Tolman
4
5Licensed under the Apache License, Version 2.0 (the "License");
6you may not use this file except in compliance with the License.
7You may obtain a copy of the License at
8
9 http://www.apache.org/licenses/LICENSE-2.0
10
11Unless required by applicable law or agreed to in writing, software
12distributed under the License is distributed on an "AS IS" BASIS,
13WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14See the License for the specific language governing permissions and
15limitations under the License.
16
17*/
18
19#ifndef MTSTD_ENCODINGS_HPP
20#define MTSTD_ENCODINGS_HPP
21
22#include "mtcore/byte_order.hpp"
24#include "mtcore/io/writer.hpp"
25#include <array>
26
31
34namespace mtcore {
39 constexpr char32_t bom = 0xFEFF;
40
45 constexpr auto is_bom(const char32_t ch) { return ch == bom; }
50 constexpr auto is_bom_rev_16_bit(const char16_t ch) { return ch == 0xFFFE; }
55 constexpr auto is_bom_rev_32_bit(const char32_t ch) { return ch == 0xFFFE0000; }
56
57 template<std::endian target, typename T = char32_t>
58 constexpr T endian_bom = target == std::endian::native ? bom : flip_endian<T>(bom);
59
66 inline Optional<std::endian> u16_detect_endian(const char16_t ch) {
67 if (is_bom(ch)) {
68 return endian::machine;
69 }
70 else if (is_bom_rev_16_bit(ch)) {
71 return endian::opposite;
72 }
73 else {
74 return nullopt;
75 }
76 }
77
84 inline Optional<std::endian> u32_detect_endian(const char32_t ch) {
85 if (is_bom(ch)) {
86 return endian::machine;
87 }
88 else if (is_bom_rev_32_bit(ch)) {
89 return endian::opposite;
90 }
91 else {
92 return nullopt;
93 }
94 }
95
96 namespace impl {
103 template<typename T, size_t MaxLen>
104 struct ShortSeq {
105 using Elem = T;
106 int len;
107 std::array<Elem, MaxLen> elems;
108
109 [[nodiscard]] size_t size() const {
110 ensure(len < MaxLen);
111 return len;
112 }
113
114 T &operator[](size_t i) {
115 ensure(i <= MaxLen, "out of bounds");
116 ensure(static_cast<int>(i) < len, "out of bounds");
117 return elems[i];
118 }
119
120 const T &operator[](size_t i) const {
121 ensure(i <= MaxLen, "out of bounds");
122 ensure(static_cast<int>(i) < len, "out of bounds");
123 return elems[i];
124 }
125
126 [[nodiscard]] T &at(size_t i) noexcept { return (*this)[i]; }
127 [[nodiscard]] constexpr const T &at(size_t i) const noexcept { return (*this)[i]; }
128
129 [[nodiscard]] decltype(auto) ptr_iter() noexcept { return iter::ptr(*this); }
130 [[nodiscard]] decltype(auto) ptr_iter() const noexcept { return iter::const_ptr(*this); }
131 [[nodiscard]] decltype(auto) iter() const noexcept { return iter::val(*this); }
132 };
133 } // namespace impl
134
135 using CharSeq = impl::ShortSeq<char, 4>;
136 using U8Seq = impl::ShortSeq<char8_t, 4>;
137 using U16Seq = impl::ShortSeq<char16_t, 2>;
138
143 constexpr bool is_surrogate(char16_t ch) { return ch >= 0xD800 && ch <= 0xDFFF; }
147 constexpr bool is_high_surrogate(char16_t ch) { return ch >= 0xD800 && ch <= 0xDBFF; }
151 constexpr bool is_low_surrogate(char16_t ch) { return ch >= 0xDC00 && ch <= 0xDFFF; }
152
157 template<std::endian E = std::endian::native>
158 constexpr auto oppositeEndian = E == std::endian::big ? std::endian::little : std::endian::big;
159
172 template<class T, std::endian Endian = std::endian::native>
173 struct EndianSlice {
174 static_assert(std::is_same_v<T, const char16_t> || std::is_same_v<T, char16_t> ||
175 std::is_same_v<T, const char32_t> || std::is_same_v<T, char32_t>);
176 using Elem = T;
177
178 T *head = nullptr;
179 size_t len = 0;
180
181 [[nodiscard]] decltype(auto) ptr_iter() noexcept { return iter::ptr(*this); }
182 [[nodiscard]] decltype(auto) ptr_iter() const noexcept { return iter::const_ptr(*this); }
183 [[nodiscard]] decltype(auto) iter() const noexcept { return iter::val(*this); }
184
186 [[nodiscard]] constexpr Slice<std::add_const_t<T>> to_const() const noexcept {
187 return {static_cast<std::add_pointer_t<std::add_const_t<T>>>(head), len};
188 }
189
198 constexpr void init(T *head, size_t len) {
199 this->head = head;
200 this->len = len;
201 }
202
210 size_t i;
211
215 constexpr operator T() const { return as_endian(std::endian::native); }
216
217 [[nodiscard]] constexpr auto native() const { return as_endian(std::endian::native); }
218
223 [[nodiscard]] constexpr T as_endian(std::endian viewAs) const {
224 ensure(i < slice.size(), "SLICE ACCESS OUT OF BOUNDS");
225 ensure(slice.head, "NULL POINTER DEREFERENCE");
226 auto v = slice.head[i];
227 if (viewAs != Endian) {
228 return flip_endian(v);
229 }
230 return v;
231 }
232 };
233
241 size_t i;
242
246 constexpr operator T() const { return as_endian(std::endian::native); }
247
251 constexpr EndianWrapper &operator=(T val) { return with_endian(val, std::endian::native); }
252
253 [[nodiscard]] constexpr T native() const { return as_endian(std::endian::native); }
254
259 [[nodiscard]] constexpr T as_endian(std::endian viewAs) const {
260 ensure(i < slice.size(), "SLICE ACCESS OUT OF BOUNDS");
261 ensure(slice.head, "NULL POINTER DEREFERENCE");
262 auto v = slice.head[i];
263 if (viewAs != Endian) {
264 return flip_endian(v);
265 }
266 return v;
267 }
268
274 constexpr EndianWrapper &with_endian(T newVal, std::endian setAs) {
275 ensure(i < slice.size(), "SLICE ACCESS OUT OF BOUNDS");
276 ensure(slice.head, "NULL POINTER DEREFERENCE");
277 if (setAs != Endian) {
278 newVal = flip_endian(newVal);
279 }
280 slice.head[i] = newVal;
281 return *this;
282 }
283 };
284
290 [[nodiscard]] constexpr ConstEndianWrapper operator[](size_t i) const noexcept {
291 ensure(i < len, "SLICE ACCESS OUT OF BOUNDS");
292 ensure(head, "NULL POINTER DEREFERENCE");
293 return ConstEndianWrapper{*this, i};
294 }
295
301 EndianWrapper operator[](size_t i) noexcept {
302 ensure(i < len, "SLICE ACCESS OUT OF BOUNDS");
303 ensure(head, "NULL POINTER DEREFERENCE");
304 return EndianWrapper{*this, i};
305 }
306
312 EndianWrapper at(size_t i) noexcept { return (*this)[i]; }
313
319 [[nodiscard]] constexpr ConstEndianWrapper at(size_t i) const noexcept { return (*this)[i]; }
320
324 [[nodiscard]] constexpr size_t size() const noexcept { return head != nullptr ? len : 0; }
325
329 [[nodiscard]] constexpr bool empty() const noexcept { return head == nullptr || len == 0; }
330
335 [[nodiscard]] constexpr EndianSlice sub(size_t start) const noexcept {
336 EndianSlice res;
337 if (start >= len) {
338 res.init(nullptr, 0);
339 return res;
340 }
341
342 res.init(head + start, len - start);
343 ensure(res.len + start == len, "BAD LENGTH MATH");
344 ensure(head + len == res.head + res.len, "BAD END POINTER");
345 return res;
346 }
347
353 [[nodiscard]] constexpr EndianSlice sub(size_t start, size_t len) const noexcept {
354 auto res = sub(start);
355 res.len = res.len > len ? len : res.len;
356 ensure(res.len <= len, "BAD LENGTH");
357 return res;
358 }
359
365 template<std::endian E>
366 std::strong_ordering operator<=>(const EndianSlice<std::remove_const_t<T>, E> &other) const noexcept {
367 return *this <=> other.to_const();
368 }
369
375 template<std::endian E>
376 std::strong_ordering operator<=>(const EndianSlice<std::add_const_t<T>, E> &other) const noexcept {
377 if (!head) {
378 if (other.head) {
379 return std::strong_ordering::less;
380 }
381 return std::strong_ordering::equal;
382 }
383
384 for (size_t i = 0; i < len && i < other.len; ++i) {
385 auto tv = (*this)[i].native();
386 auto ov = other[i].native();
387 if (tv > ov) {
388 return std::strong_ordering::greater;
389 }
390 else if (tv < ov) {
391 return std::strong_ordering::less;
392 }
393 }
394
395 if (len == other.len) {
396 return std::strong_ordering::equal;
397 }
398 else if (len < other.len) {
399 return std::strong_ordering::less;
400 }
401 else {
402 return std::strong_ordering::greater;
403 }
404 }
405
406 template<std::endian E>
407 bool operator==(const EndianSlice<std::remove_const_t<T>, E> &o) const noexcept {
408 return (*this <=> o) == std::strong_ordering::equal;
409 }
410
411 template<std::endian E>
412 bool operator!=(const EndianSlice<std::remove_const_t<T>, E> &o) const noexcept {
413 return (*this <=> o) != std::strong_ordering::equal;
414 }
415
416 template<std::endian E>
417 bool operator<(const EndianSlice<std::remove_const_t<T>, E> &o) const noexcept {
418 return (*this <=> o) == std::strong_ordering::less;
419 }
420
421 template<std::endian E>
422 bool operator>(const EndianSlice<std::remove_const_t<T>, E> &o) const noexcept {
423 return (*this <=> o) == std::strong_ordering::greater;
424 }
425
426 template<std::endian E>
427 bool operator<=(const EndianSlice<std::remove_const_t<T>, E> &o) const noexcept {
428 const auto cmp = *this <=> o;
429 return (cmp == std::strong_ordering::less || cmp == std::strong_ordering::equal);
430 }
431
432 template<std::endian E>
433 bool operator>=(const EndianSlice<std::remove_const_t<T>, E> &o) const noexcept {
434 const auto cmp = *this <=> o;
435 return (cmp == std::strong_ordering::greater || cmp == std::strong_ordering::equal);
436 }
437
438 template<std::endian E>
439 bool operator==(const EndianSlice<std::add_const_t<T>, E> &o) const noexcept {
440 return (*this <=> o) == std::strong_ordering::equal;
441 }
442
443 template<std::endian E>
444 bool operator!=(const EndianSlice<std::add_const_t<T>, E> &o) const noexcept {
445 return (*this <=> o) != std::strong_ordering::equal;
446 }
447
448 template<std::endian E>
449 bool operator<(const EndianSlice<std::add_const_t<T>, E> &o) const noexcept {
450 return (*this <=> o) == std::strong_ordering::less;
451 }
452
453 template<std::endian E>
454 bool operator>(const EndianSlice<std::add_const_t<T>, E> &o) const noexcept {
455 return (*this <=> o) == std::strong_ordering::greater;
456 }
457
458 template<std::endian E>
459 bool operator<=(const EndianSlice<std::add_const_t<T>, E> &o) const noexcept {
460 const auto cmp = *this <=> o;
461 return (cmp == std::strong_ordering::less || cmp == std::strong_ordering::equal);
462 }
463
464 template<std::endian E>
465 bool operator>=(const EndianSlice<std::add_const_t<T>, E> &o) const noexcept {
466 const auto cmp = *this <=> o;
467 return (cmp == std::strong_ordering::greater || cmp == std::strong_ordering::equal);
468 }
469 };
470
471 namespace impl {
472 template<typename T, std::endian E>
473 struct EndianSliceWriterImpl {
474 static_assert(!std::is_const_v<T>, "Cannot write to const pointer!");
475 EndianSlice<T, E> out;
476 size_t curWriteIndex = 0;
477
478 using WriteElem = T;
479 using ErrType = io::SliceWriteError;
480
481 Result<size_t, ErrType> write(Slice<std::add_const_t<T>> bytes) {
482 size_t i = 0;
483 for (; i < bytes.size() && curWriteIndex < out.size(); ++i, ++curWriteIndex) {
484 out[curWriteIndex] = bytes[i];
485 }
486 return success(i);
487 }
488
489 template<std::endian E2>
490 Result<size_t, ErrType> write(EndianSlice<std::add_const_t<T>, E2> bytes) {
491 size_t i = 0;
492 for (; i < bytes.size() && curWriteIndex < out.size(); ++i, ++curWriteIndex) {
493 out[curWriteIndex] = bytes[i].native();
494 }
495 return success(i);
496 }
497 };
498 } // namespace impl
499
524
553
558 inline SliceU16Native endian_from_native_slice(Slice<char16_t> s) { return {.head = s.head, .len = s.len}; }
563 inline SliceU32Native endian_from_native_slice(Slice<char32_t> s) { return {.head = s.head, .len = s.len}; }
564
569 inline SliceU16Le endian_from_le_slice(Slice<char16_t> s) { return {.head = s.head, .len = s.len}; }
574 inline SliceU32Le endian_from_le_slice(Slice<char32_t> s) { return {.head = s.head, .len = s.len}; }
575
580 inline SliceU16Be endian_from_be_slice(Slice<char16_t> s) { return {.head = s.head, .len = s.len}; }
585 inline SliceU32Be endian_from_be_slice(Slice<char32_t> s) { return {.head = s.head, .len = s.len}; }
586
591 inline SliceConstU16Native endian_from_native_slice(Slice<const char16_t> s) {
592 return {.head = s.head, .len = s.len};
593 }
594 /*
595 * Gets a const UTF-32 endian slice from a slice which is assumed ot be native endian
596 * @ingroup unicode_enc
597 */
599 return {.head = s.head, .len = s.len};
600 }
601
606 inline SliceConstU16Le endian_from_le_slice(Slice<const char16_t> s) { return {.head = s.head, .len = s.len}; }
611 inline SliceConstU32Le endian_from_le_slice(Slice<const char32_t> s) { return {.head = s.head, .len = s.len}; }
612
617 inline SliceConstU16Be endian_from_be_slice(Slice<const char16_t> s) { return {.head = s.head, .len = s.len}; }
622 inline SliceConstU32Be endian_from_be_slice(Slice<const char32_t> s) { return {.head = s.head, .len = s.len}; }
623
632 inline std::variant<SliceU16Le, SliceU16Be> endian_from_slice(Slice<char16_t> s) {
633 if (s.size() > 0) {
634 if (auto detect = u16_detect_endian(s[0]); detect.has_value()) {
635 auto subbed = s.sub(1);
636 if (detect.value() == std::endian::little) {
637 return SliceU16Le{.head = subbed.head, .len = subbed.len};
638 }
639 else {
640 return SliceU16Be{.head = subbed.head, .len = subbed.len};
641 }
642 }
643 }
644 return SliceU16Native{.head = s.head, .len = s.len};
645 }
646
655 inline std::variant<SliceU32Le, SliceU32Be> endian_from_slice(Slice<char32_t> s) {
656 if (s.size() > 0) {
657 if (auto detect = u16_detect_endian(s[0]); detect.has_value()) {
658 auto subbed = s.sub(1);
659 if (detect.value() == std::endian::little) {
660 return SliceU32Le{.head = subbed.head, .len = subbed.len};
661 }
662 else {
663 return SliceU32Be{.head = subbed.head, .len = subbed.len};
664 }
665 }
666 }
667 return SliceU32Native{.head = s.head, .len = s.len};
668 }
669
678 inline std::variant<SliceConstU16Le, SliceConstU16Be> endian_from_slice(Slice<const char16_t> s) {
679 if (s.size() > 0) {
680 if (auto detect = u16_detect_endian(s[0]); detect.has_value()) {
681 auto subbed = s.sub(1);
682 if (detect.value() == std::endian::little) {
683 return SliceConstU16Le{.head = subbed.head, .len = subbed.len};
684 }
685 else {
686 return SliceConstU16Be{.head = subbed.head, .len = subbed.len};
687 }
688 }
689 }
690 return SliceConstU16Native{.head = s.head, .len = s.len};
691 }
692
701 inline std::variant<SliceConstU32Le, SliceConstU32Be> endian_from_slice(Slice<const char32_t> s) {
702 if (s.size() > 0) {
703 if (auto detect = u16_detect_endian(s[0]); detect.has_value()) {
704 auto subbed = s.sub(1);
705 if (detect.value() == std::endian::little) {
706 return SliceConstU32Le{.head = subbed.head, .len = subbed.len};
707 }
708 else {
709 return SliceConstU32Be{.head = subbed.head, .len = subbed.len};
710 }
711 }
712 }
713 return SliceConstU32Native{.head = s.head, .len = s.len};
714 }
715
720 inline Slice<char16_t> slice_from_native_endian(SliceU16Native s) { return {.head = s.head, .len = s.len}; }
726 return {.head = s.head, .len = s.len};
727 }
728
732 inline Slice<char32_t> slice_from_native_endian(SliceU32Native s) { return {.head = s.head, .len = s.len}; }
738 return {.head = s.head, .len = s.len};
739 }
740
745 inline Slice<char16_t> slice_from_le_endian(SliceU16Native s) { return {.head = s.head, .len = s.len}; }
750 inline Slice<const char16_t> slice_from_le_endian(SliceConstU16Native s) { return {.head = s.head, .len = s.len}; }
755 inline Slice<char32_t> slice_from_le_endian(SliceU32Native s) { return {.head = s.head, .len = s.len}; }
760 inline Slice<const char32_t> slice_from_le_endian(SliceConstU32Native s) { return {.head = s.head, .len = s.len}; }
761
766 inline Slice<char16_t> slice_from_be_endian(SliceU16Native s) { return {.head = s.head, .len = s.len}; }
771 inline Slice<const char16_t> slice_from_be_endian(SliceConstU16Native s) { return {.head = s.head, .len = s.len}; }
776 inline Slice<char32_t> slice_from_be_endian(SliceU32Native s) { return {.head = s.head, .len = s.len}; }
781 inline Slice<const char32_t> slice_from_be_endian(SliceConstU32Native s) { return {.head = s.head, .len = s.len}; }
782
783 using Rune = char32_t;
784
800
809
810 namespace impl::unicode {
811 constexpr char16_t utf16HighOffset = 0xD800;
812 constexpr char16_t utf16LowOffset = 0xDC00;
813 constexpr char32_t utf16Sub = 0x10000;
814
815 constexpr uint8_t continueFlag = 0b10000000;
816
817 constexpr auto runeMasks = std::array{
818 static_cast<u32>(0b111110000000000000000),
819 static_cast<u32>(0b000001111100000000000),
820 static_cast<u32>(0b000000000011110000000),
821 };
822
823 constexpr auto utf8StartFlags = std::array<std::tuple<uint8_t, int, uint8_t>, 5>{
824 std::make_tuple(0b11110000, 4, 0b00000111), std::make_tuple(0b11100000, 3, 0b00001111),
825 std::make_tuple(0b11000000, 2, 0b00011111), std::make_tuple(continueFlag, -1, 0b00111111),
826 std::make_tuple(0b00000000, 1, 0b01111111),
827 };
828
829 constexpr auto num_bytes(char8_t byte) -> int {
830 for (const auto &[flagSet, bytes, _flag_removal]: utf8StartFlags) {
831 if (byte >= flagSet) {
832 return bytes;
833 }
834 }
835 return -1;
836 }
837
838 constexpr auto without_flag(char8_t byte) -> uint8_t {
839 for (const auto &[flagSet, _bytes, removeFlag]: utf8StartFlags) {
840 if (byte >= flagSet) {
841 return byte & removeFlag;
842 }
843 }
844 return byte;
845 }
846
847 constexpr auto get_flag(char8_t byte) -> uint8_t {
848 for (const auto &[flagSet, _bytes, _removeFlag]: utf8StartFlags) {
849 if (byte >= flagSet) {
850 return flagSet;
851 }
852 }
853 return 0;
854 }
855 } // namespace impl::unicode
856
861
866
871 template<typename T>
873 using IterElem = Rune;
875 size_t strIndex = 0;
876
879 if (strIndex >= data.size()) {
880 return nullopt;
881 }
882 auto curIndex = strIndex;
883 char32_t currentRune = 0;
884 auto numBytes = impl::unicode::num_bytes(data[curIndex]);
885
886 if (numBytes < 0) {
887 ++strIndex;
888 }
889 else if (curIndex + numBytes - 1 >= data.size()) {
890 strIndex = data.size();
891 }
892 else {
893 strIndex = curIndex + numBytes;
894 for (size_t index = curIndex; index < strIndex; ++index) {
895 auto flag = impl::unicode::get_flag(data[index]);
896 if (index > curIndex && flag != impl::unicode::continueFlag) {
897 strIndex = curIndex + index - 1;
898 currentRune = 0;
899 break;
900 }
901 auto noFlag = impl::unicode::without_flag(data[index]);
902 currentRune <<= 6;
903 currentRune |= noFlag;
904 }
905 }
906 return currentRune;
907 }
908 };
909
914 template<>
915 struct RuneIterator<char32_t> {
916 using IterElem = char32_t;
918 size_t strIndex = 0;
919 bool reverseBom = false;
920
922 if (strIndex >= data.len) {
923 return nullopt;
924 }
925 const auto res = data[strIndex++];
926 if (reverseBom) {
927 return flip_endian_ch32(res);
928 }
929 return res;
930 }
931 };
932
938 template<>
939 struct RuneIterator<char16_t> {
940 using IterElem = char32_t;
942 size_t strIndex = 0;
943 bool reverseBom = false;
944
946 if (strIndex >= data.size()) {
947 return nullopt;
948 }
949 size_t curIndex = strIndex;
950 char32_t currentRune = 0;
951
952 auto ch = data[curIndex];
953 if (reverseBom) {
954 ch = flip_endian_ch16(ch);
955 }
956
957 if (is_high_surrogate(ch)) {
958 if (curIndex + 1 >= data.size()) {
959 strIndex = data.size();
960 return currentRune;
961 }
962 else {
963 auto chNext = data[curIndex + 1];
964 if (reverseBom) {
965 chNext = flip_endian_ch16(chNext);
966 }
967
968 const auto high = (ch - impl::unicode::utf16HighOffset) * 0x400;
969 const auto low = chNext - impl::unicode::utf16LowOffset;
970 currentRune = high + low + impl::unicode::utf16Sub;
971 strIndex = curIndex + 2;
972 }
973 }
974 else {
975 if (!is_low_surrogate(ch)) {
976 currentRune = static_cast<char32_t>(ch);
977 if (reverseBom) {
978 flip_endian_ch16(currentRune);
979 }
980 }
981 ++strIndex;
982 }
983 return currentRune;
984 }
985 };
986
991 inline RuneIterator<char> str_rune_iterator(const Slice<const char> &s) { return {.data = s, .strIndex = 0}; }
992
997 inline RuneIterator<char8_t> utf8_rune_iterator(const Slice<const char8_t> &s) { return {.data = s, .strIndex = 0}; }
998
1004 if (s.size()) {
1005 if (const auto encoding = u16_detect_endian(s[0]); encoding.has_value()) {
1006 return {.data = s.sub(1), .strIndex = 0, .reverseBom = encoding == endian::opposite};
1007 }
1008 }
1009 return {.data = s, .strIndex = 0};
1010 }
1011
1017 return {
1018 .data = {.head = s.head, .len = s.len}, .strIndex = 0, .reverseBom = std::endian::native != std::endian::little};
1019 }
1020
1026 return {
1027 .data = {.head = s.head, .len = s.len}, .strIndex = 0, .reverseBom = std::endian::native != std::endian::big};
1028 }
1029
1035 return {
1036 .data = {.head = s.head, .len = s.len}, .strIndex = 0, .reverseBom = std::endian::native != std::endian::little};
1037 }
1038
1044 return {
1045 .data = {.head = s.head, .len = s.len}, .strIndex = 0, .reverseBom = std::endian::native != std::endian::big};
1046 }
1047
1053 if (s.size()) {
1054 if (const auto encoding = u32_detect_endian(s[0]); encoding.has_value()) {
1055 return {.data = s.sub(1), .strIndex = 0, .reverseBom = encoding == endian::opposite};
1056 }
1057 }
1058 return {.data = s, .strIndex = 0};
1059 }
1060
1066 return {
1067 .data = {.head = s.head, .len = s.len}, .strIndex = 0, .reverseBom = std::endian::native != std::endian::little};
1068 }
1069
1075 return {
1076 .data = {.head = s.head, .len = s.len}, .strIndex = 0, .reverseBom = std::endian::native != std::endian::big};
1077 }
1078
1084 return {
1085 .data = {.head = s.head, .len = s.len}, .strIndex = 0, .reverseBom = std::endian::native != std::endian::little};
1086 }
1087
1093 return {
1094 .data = {.head = s.head, .len = s.len}, .strIndex = 0, .reverseBom = std::endian::native != std::endian::big};
1095 }
1096
1107 template<WriterImpl WI>
1108 auto utf32_to_utf16(io::Writer<WI> &writer, const SliceConstU32Be &str, bool writeBom = false)
1110 char32_t cur;
1111 auto iter = utf32_rune_iterator(str);
1112 size_t written = 0;
1113
1114 if (writeBom) {
1115 if (auto res = writer.write(bom); res.is_error()) {
1116 return res.error();
1117 }
1118 ++written;
1119 }
1120
1121 while (iter.next().copy_if_present(cur)) {
1122 auto seq = rune_to_utf16(cur);
1123 char16_t curCh;
1124 auto iter2 = seq.iter();
1125 while (iter2.next().copy_if_present(curCh)) {
1126 if (auto writeRes = writer.write(curCh); writeRes.is_error()) {
1127 return writeRes.error();
1128 }
1129 ++written;
1130 }
1131 }
1132 return success(written);
1133 }
1134
1145 template<WriterImpl WI>
1146 auto utf32_to_utf16(io::Writer<WI> &writer, const SliceConstU32Le &str, bool writeBom = false)
1148 char32_t cur;
1149 auto iter = utf32_rune_iterator(str);
1150 size_t written = 0;
1151
1152 if (writeBom) {
1153 if (auto res = writer.write(bom); res.is_error()) {
1154 return res.error();
1155 }
1156 ++written;
1157 }
1158
1159 while (iter.next().copy_if_present(cur)) {
1160 auto seq = rune_to_utf16(cur);
1161 char16_t curCh;
1162 auto iter2 = seq.iter();
1163 while (iter2.next().copy_if_present(curCh)) {
1164 if (auto writeRes = writer.write(curCh); writeRes.is_error()) {
1165 return writeRes.error();
1166 }
1167 ++written;
1168 }
1169 }
1170 return success(written);
1171 }
1172
1183 template<WriterImpl WI>
1184 auto utf32_to_utf16(io::Writer<WI> &writer, const Slice<const char32_t> &str, bool writeBom = false)
1186 return utf32_to_utf16(writer, endian_from_native_slice(str), writeBom);
1187 }
1188
1197 template<WriterImpl WI>
1200 char32_t cur;
1201 auto iter = utf32_rune_iterator(str);
1202 size_t written = 0;
1203 while (iter.next().copy_if_present(cur)) {
1204 auto seq = rune_to_utf8(cur);
1205 char8_t curCh;
1206 auto iter2 = seq.iter();
1207 while (iter2.next().copy_if_present(curCh)) {
1208 if (auto writeRes = writer.write(curCh); writeRes.is_error()) {
1209 return writeRes.error();
1210 }
1211 ++written;
1212 }
1213 }
1214 return success(written);
1215 }
1216
1225 template<WriterImpl WI>
1228 char32_t cur;
1229 auto iter = utf32_rune_iterator(str);
1230 size_t written = 0;
1231 while (iter.next().copy_if_present(cur)) {
1232 auto seq = rune_to_utf8(cur);
1233 char8_t curCh;
1234 auto iter2 = seq.iter();
1235 while (iter2.next().copy_if_present(curCh)) {
1236 if (auto writeRes = writer.write(curCh); writeRes.is_error()) {
1237 return writeRes.error();
1238 }
1239 ++written;
1240 }
1241 }
1242 return success(written);
1243 }
1244
1253 template<WriterImpl WI>
1258
1267 template<WriterImpl WI>
1270 char32_t cur;
1271 auto iter = utf32_rune_iterator(str);
1272 size_t written = 0;
1273 while (iter.next().copy_if_present(cur)) {
1274 auto seq = rune_to_str(cur);
1275 char curCh;
1276 auto iter2 = seq.iter();
1277 while (iter2.next().copy_if_present(curCh)) {
1278 if (auto writeRes = writer.write(curCh); writeRes.is_error()) {
1279 return writeRes.error();
1280 }
1281 ++written;
1282 }
1283 }
1284 return success(written);
1285 }
1286
1295 template<WriterImpl WI>
1298 char32_t cur;
1299 auto iter = utf32_rune_iterator(str);
1300 size_t written = 0;
1301 while (iter.next().copy_if_present(cur)) {
1302 auto seq = rune_to_str(cur);
1303 char curCh;
1304 auto iter2 = seq.iter();
1305 while (iter2.next().copy_if_present(curCh)) {
1306 if (auto writeRes = writer.write(curCh); writeRes.is_error()) {
1307 return writeRes.error();
1308 }
1309 ++written;
1310 }
1311 }
1312 return success(written);
1313 }
1314
1323 template<WriterImpl WI>
1328
1338 template<WriterImpl WI>
1339 auto utf8_to_utf32(io::Writer<WI> &writer, const Slice<const char8_t> &str, bool writeBom = false)
1341 auto runeIter = utf8_rune_iterator(str);
1342 char32_t rune;
1343 size_t written = 0;
1344
1345 if (writeBom) {
1346 if (auto res = writer.write(bom); res.is_error()) {
1347 return res.error();
1348 }
1349 ++written;
1350 }
1351
1352 while (runeIter.next().copy_if_present(rune)) {
1353 if (auto writeRes = writer.write(rune); writeRes.is_error()) {
1354 return writeRes.error();
1355 }
1356 ++written;
1357 }
1358 return success(written);
1359 }
1360
1370 template<WriterImpl WI>
1371 auto str_to_utf32(io::Writer<WI> &writer, const Slice<const char> &str, bool writeBom = false)
1373 auto runeIter = str_rune_iterator(str);
1374 char32_t rune;
1375 size_t written = 0;
1376
1377 if (writeBom) {
1378 if (auto res = writer.write(bom); res.is_error()) {
1379 return res.error();
1380 }
1381 ++written;
1382 }
1383
1384 while (runeIter.next().copy_if_present(rune)) {
1385 if (auto writeRes = writer.write(rune); writeRes.is_error()) {
1386 return writeRes.error();
1387 }
1388 ++written;
1389 }
1390 return success(written);
1391 }
1392
1401 template<WriterImpl WI>
1404 auto runeIter = utf16_rune_iterator(str);
1405 char32_t rune;
1406 size_t written = 0;
1407 while (runeIter.next().copy_if_present(rune)) {
1408 auto seq = rune_to_utf8(rune);
1409 char8_t curCh;
1410 auto charIter = seq.iter();
1411 while (charIter.next().copy_if_present(curCh)) {
1412 if (auto writeRes = writer.write(curCh); writeRes.is_error()) {
1413 return writeRes.error();
1414 }
1415 ++written;
1416 }
1417 }
1418 return success(written);
1419 }
1420
1421
1431 template<WriterImpl WI>
1432 auto utf16_to_utf32(io::Writer<WI> &writer, const SliceConstU16Le &str, bool writeBom = false)
1434 auto runeIter = utf16_rune_iterator(str);
1435 char32_t rune;
1436 size_t written = 0;
1437
1438 if (writeBom) {
1439 if (auto res = writer.write(bom); res.is_error()) {
1440 return res.error();
1441 }
1442 ++written;
1443 }
1444
1445 while (runeIter.next().copy_if_present(rune)) {
1446 if (auto writeres = writer.write(rune); writeres.is_error()) {
1447 return writeres.error();
1448 }
1449 ++written;
1450 }
1451 return success(written);
1452 }
1453
1463 template<WriterImpl WI>
1464 auto utf16_to_utf32(io::Writer<WI> &writer, const SliceConstU16Be &str, bool writeBom = false)
1466 auto runeIter = utf16_rune_iterator(str);
1467 char32_t rune;
1468 size_t written = 0;
1469
1470 if (writeBom) {
1471 if (auto res = writer.write(bom); res.is_error()) {
1472 return res.error();
1473 }
1474 ++written;
1475 }
1476
1477 while (runeIter.next().copy_if_present(rune)) {
1478 if (auto writeres = writer.write(rune); writeres.is_error()) {
1479 return writeres.error();
1480 }
1481 ++written;
1482 }
1483 return success(written);
1484 }
1485
1495 template<WriterImpl WI>
1496 auto utf16_to_utf32(io::Writer<WI> &writer, const Slice<const char16_t> &str, bool writeBom = false)
1498 return utf16_to_utf32(writer, endian_from_native_slice(str), writeBom);
1499 }
1500
1509 template<WriterImpl WI>
1512 auto runeIter = utf16_rune_iterator(str);
1513 char32_t rune;
1514 size_t written = 0;
1515 while (runeIter.next().copy_if_present(rune)) {
1516 auto seq = rune_to_utf8(rune);
1517 char8_t curCh;
1518 auto charIter = seq.iter();
1519 while (charIter.next().copy_if_present(curCh)) {
1520 if (auto writeRes = writer.write(curCh); writeRes.is_error()) {
1521 return writeRes.error();
1522 }
1523 ++written;
1524 }
1525 }
1526 return success(written);
1527 }
1528
1537 template<WriterImpl WI>
1540 auto runeIter = utf16_rune_iterator(str);
1541 char32_t rune;
1542 size_t written = 0;
1543 while (runeIter.next().copy_if_present(rune)) {
1544 auto seq = rune_to_str(rune);
1545 char curCh;
1546 auto charIter = seq.iter();
1547 while (charIter.next().copy_if_present(curCh)) {
1548 if (auto writeRes = writer.write(curCh); writeRes.is_error()) {
1549 return writeRes.error();
1550 }
1551 ++written;
1552 }
1553 }
1554 return success(written);
1555 }
1556
1565 template<WriterImpl WI>
1568 auto runeIter = utf16_rune_iterator(str);
1569 char32_t rune;
1570 size_t written = 0;
1571 while (runeIter.next().copy_if_present(rune)) {
1572 auto seq = rune_to_str(rune);
1573 char curCh;
1574 auto charIter = seq.iter();
1575 while (charIter.next().copy_if_present(curCh)) {
1576 if (auto writeRes = writer.write(curCh); writeRes.is_error()) {
1577 return writeRes.error();
1578 }
1579 ++written;
1580 }
1581 }
1582 return success(written);
1583 }
1584
1593 template<WriterImpl WI>
1598
1607 template<WriterImpl WI>
1612
1622 template<WriterImpl WI>
1623 auto utf8_to_utf16(io::Writer<WI> &writer, const Slice<const char8_t> &str, bool writeBom = false)
1625 auto runeIter = utf8_rune_iterator(str);
1626 char32_t rune;
1627 size_t written = 0;
1628
1629 if (writeBom) {
1630 if (auto res = writer.write(bom); res.is_error()) {
1631 return res.error();
1632 }
1633 ++written;
1634 }
1635
1636 while (runeIter.next().copy_if_present(rune)) {
1637 auto seq = rune_to_utf16(rune);
1638 char16_t curCh;
1639 auto charIter = seq.iter();
1640 while (charIter.next().copy_if_present(curCh)) {
1641 if (auto writeRes = writer.write(curCh); writeRes.is_error()) {
1642 return writeRes.error();
1643 }
1644 ++written;
1645 }
1646 }
1647 return success(written);
1648 }
1649
1659 template<WriterImpl WI>
1660 auto str_to_utf16(io::Writer<WI> &writer, const Slice<const char> &str, bool writeBom = false)
1662 auto runeIter = str_rune_iterator(str);
1663 char32_t rune;
1664 size_t written = 0;
1665
1666 if (writeBom) {
1667 if (auto res = writer.write(bom); res.is_error()) {
1668 return res.error();
1669 }
1670 ++written;
1671 }
1672
1673 while (runeIter.next().copy_if_present(rune)) {
1674 auto seq = rune_to_utf16(rune);
1675 char16_t curCh;
1676 auto charIter = seq.iter();
1677 while (charIter.next().copy_if_present(curCh)) {
1678 if (auto writeRes = writer.write(curCh); writeRes.is_error()) {
1679 return writeRes.error();
1680 }
1681 ++written;
1682 }
1683 }
1684 return success(written);
1685 }
1686
1695 template<WriterImpl WI>
1698 auto runeIter = utf8_rune_iterator(str);
1699 char32_t rune;
1700 size_t written = 0;
1701 while (runeIter.next().copy_if_present(rune)) {
1702 auto seq = rune_to_str(rune);
1703 char curCh;
1704 auto charIter = seq.iter();
1705 while (charIter.next().copy_if_present(curCh)) {
1706 if (auto writeRes = writer.write(curCh); writeRes.is_error()) {
1707 return writeRes.error();
1708 }
1709 ++written;
1710 }
1711 }
1712 return success(written);
1713 }
1714
1723 template<WriterImpl WI>
1726 auto runeIter = str_rune_iterator(str);
1727 char32_t rune;
1728 size_t written = 0;
1729 while (runeIter.next().copy_if_present(rune)) {
1730 auto seq = rune_to_utf8(rune);
1731 char8_t curCh;
1732 auto charIter = seq.iter();
1733 while (charIter.next().copy_if_present(curCh)) {
1734 if (auto writeRes = writer.write(curCh); writeRes.is_error()) {
1735 return writeRes.error();
1736 }
1737 ++written;
1738 }
1739 }
1740 return success(written);
1741 }
1742
1752 template<WriterImpl WI>
1753 auto utf16_to_utf16le(io::Writer<WI> &writer, const SliceConstU16Native &str, bool writeBom = false)
1755 auto runeIter = utf16_rune_iterator(str);
1756 char32_t rune;
1757 size_t written = 0;
1758
1759 if (writeBom) {
1760 if (auto res = writer.write(endian_bom<std::endian::little, char16_t>); res.is_error()) {
1761 return res.error();
1762 }
1763 ++written;
1764 }
1765
1766 while (runeIter.next().copy_if_present(rune)) {
1767 auto seq = rune_to_utf16(rune);
1768 char16_t cur;
1769 auto seqIter = seq.iter();
1770 while (seqIter.next().copy_if_present(cur)) {
1771 if constexpr (std::endian::native != std::endian::little) {
1772 cur = flip_endian(cur);
1773 }
1774 if (auto writeRes = writer.write(cur); writeRes.is_error()) {
1775 return writeRes.error();
1776 }
1777 ++written;
1778 }
1779 }
1780 return success(written);
1781 }
1782
1792 template<WriterImpl WI>
1793 auto utf16_to_utf16le(io::Writer<WI> &writer, const Slice<const char16_t> &str, bool writeBom = false)
1795 return utf16_to_utf16le(writer, endian_from_native_slice(str), writeBom);
1796 }
1797
1807 template<WriterImpl WI>
1808 auto utf16_to_utf16be(io::Writer<WI> &writer, const SliceConstU16Native &str, bool writeBom = false)
1810 auto runeIter = utf16_rune_iterator(str);
1811 char32_t rune;
1812 size_t written = 0;
1813
1814 if (writeBom) {
1815 if (auto res = writer.write(endian_bom<std::endian::big, char16_t>); res.is_error()) {
1816 return res.error();
1817 }
1818 ++written;
1819 }
1820
1821 while (runeIter.next().copy_if_present(rune)) {
1822 auto seq = rune_to_utf16(rune);
1823 char16_t cur;
1824 auto seqIter = seq.iter();
1825 while (seqIter.next().copy_if_present(cur)) {
1826 if constexpr (std::endian::native != std::endian::big) {
1827 cur = flip_endian(cur);
1828 }
1829 if (auto writeRes = writer.write(cur); writeRes.is_error()) {
1830 return writeRes.error();
1831 }
1832 ++written;
1833 }
1834 }
1835 return success(written);
1836 }
1837
1847 template<WriterImpl WI>
1848 auto utf16_to_utf16be(io::Writer<WI> &writer, const Slice<const char16_t> &str, bool writeBom = false)
1850 return utf16_to_utf16be(writer, endian_from_native_slice(str), writeBom);
1851 }
1852
1862 template<WriterImpl WI>
1863 auto utf16le_to_utf16be(io::Writer<WI> &writer, const SliceConstU16Le &str, bool writeBom = false)
1865 auto runeIter = utf16_rune_iterator(str);
1866 char32_t rune;
1867 size_t written = 0;
1868
1869 if (writeBom) {
1870 if (auto res = writer.write(endian_bom<std::endian::big, char16_t>); res.is_error()) {
1871 return res.error();
1872 }
1873 ++written;
1874 }
1875
1876 while (runeIter.next().copy_if_present(rune)) {
1877 auto seq = rune_to_utf16(rune);
1878 char16_t cur;
1879 auto seqIter = seq.iter();
1880 while (seqIter.next().copy_if_present(cur)) {
1881 if constexpr (std::endian::native != std::endian::big) {
1882 cur = flip_endian(cur);
1883 }
1884 if (auto writeRes = writer.write(cur); writeRes.is_error()) {
1885 return writeRes.error();
1886 }
1887 ++written;
1888 }
1889 }
1890 return success(written);
1891 }
1892
1902 template<WriterImpl WI>
1903 auto utf16be_to_utf16le(io::Writer<WI> &writer, const SliceConstU16Be &str, bool writeBom = false)
1905 auto runeIter = utf16_rune_iterator(str);
1906 char32_t rune;
1907 size_t written = 0;
1908
1909 if (writeBom) {
1910 if (auto res = writer.write(endian_bom<std::endian::little, char16_t>); res.is_error()) {
1911 return res.error();
1912 }
1913 ++written;
1914 }
1915
1916 while (runeIter.next().copy_if_present(rune)) {
1917 auto seq = rune_to_utf16(rune);
1918 char16_t cur;
1919 auto seqIter = seq.iter();
1920 while (seqIter.next().copy_if_present(cur)) {
1921 if constexpr (std::endian::native != std::endian::little) {
1922 cur = flip_endian(cur);
1923 }
1924 if (auto writeRes = writer.write(cur); writeRes.is_error()) {
1925 return writeRes.error();
1926 }
1927 ++written;
1928 }
1929 }
1930 return success(written);
1931 }
1932
1942 template<WriterImpl WI>
1943 auto utf16le_to_utf16(io::Writer<WI> &writer, const SliceConstU16Le &str, bool writeBom = false)
1945 auto runeIter = utf16_rune_iterator(str);
1946 char32_t rune;
1947 size_t written = 0;
1948
1949 if (writeBom) {
1950 if (auto res = writer.write(endian_bom<std::endian::native, char16_t>); res.is_error()) {
1951 return res.error();
1952 }
1953 ++written;
1954 }
1955
1956 while (runeIter.next().copy_if_present(rune)) {
1957 auto seq = rune_to_utf16(rune);
1958 char16_t cur;
1959 auto seqIter = seq.iter();
1960 while (seqIter.next().copy_if_present(cur)) {
1961 if (auto writeRes = writer.write(cur); writeRes.is_error()) {
1962 return writeRes.error();
1963 }
1964 ++written;
1965 }
1966 }
1967 return success(written);
1968 }
1969
1979 template<WriterImpl WI>
1980 auto utf16be_to_utf16(io::Writer<WI> &writer, const SliceConstU16Be &str, bool writeBom = false)
1982 auto runeIter = utf16_rune_iterator(str);
1983 char32_t rune;
1984 size_t written = 0;
1985
1986 if (writeBom) {
1987 if (auto res = writer.write(endian_bom<std::endian::native, char16_t>); res.is_error()) {
1988 return res.error();
1989 }
1990 ++written;
1991 }
1992
1993 while (runeIter.next().copy_if_present(rune)) {
1994 auto seq = rune_to_utf16(rune);
1995 char16_t cur;
1996 auto seqIter = seq.iter();
1997 while (seqIter.next().copy_if_present(cur)) {
1998 if (auto writeRes = writer.write(cur); writeRes.is_error()) {
1999 return writeRes.error();
2000 }
2001 ++written;
2002 }
2003 }
2004 return success(written);
2005 }
2006
2016 template<WriterImpl WI>
2017 auto utf32_to_utf32le(io::Writer<WI> &writer, const SliceConstU32Native &str, bool writeBom = false)
2019 auto runeIter = utf32_rune_iterator(str);
2020 char32_t rune;
2021 size_t written = 0;
2022
2023 if (writeBom) {
2024 if (auto res = writer.write(endian_bom<std::endian::little, char32_t>); res.is_error()) {
2025 return res.error();
2026 }
2027 ++written;
2028 }
2029
2030 while (runeIter.next().copy_if_present(rune)) {
2031 if constexpr (std::endian::native != std::endian::little) {
2032 rune = flip_endian(rune);
2033 }
2034 if (auto writeRes = writer.write(rune); writeRes.is_error()) {
2035 return writeRes.error();
2036 }
2037 ++written;
2038 }
2039 return success(written);
2040 }
2041
2051 template<WriterImpl WI>
2052 auto utf32_to_utf32le(io::Writer<WI> &writer, const Slice<const char32_t> &str, bool writeBom = false)
2054 return utf32_to_utf32le(writer, endian_from_native_slice(str), writeBom);
2055 }
2056
2066 template<WriterImpl WI>
2067 auto utf32_to_utf32be(io::Writer<WI> &writer, const SliceConstU32Native &str, bool writeBom = false)
2069 auto runeIter = utf32_rune_iterator(str);
2070 char32_t rune;
2071 size_t written = 0;
2072
2073 if (writeBom) {
2074 if (auto res = writer.write(endian_bom<std::endian::big, char32_t>); res.is_error()) {
2075 return res.error();
2076 }
2077 ++written;
2078 }
2079
2080 while (runeIter.next().copy_if_present(rune)) {
2081 if constexpr (std::endian::native != std::endian::big) {
2082 rune = flip_endian(rune);
2083 }
2084 if (auto writeRes = writer.write(rune); writeRes.is_error()) {
2085 return writeRes.error();
2086 }
2087 ++written;
2088 }
2089 return success(written);
2090 }
2091
2101 template<WriterImpl WI>
2102 auto utf32_to_utf32be(io::Writer<WI> &writer, const Slice<const char32_t> &str, bool writeBom = false)
2104 return utf32_to_utf32be(writer, endian_from_native_slice(str), writeBom);
2105 }
2106
2116 template<WriterImpl WI>
2117 auto utf32le_to_utf32be(io::Writer<WI> &writer, const SliceConstU32Le &str, bool writeBom = false)
2119 auto runeIter = utf32_rune_iterator(str);
2120 char32_t rune;
2121 size_t written = 0;
2122
2123 if (writeBom) {
2124 if (auto res = writer.write(endian_bom<std::endian::big, char32_t>); res.is_error()) {
2125 return res.error();
2126 }
2127 ++written;
2128 }
2129
2130 while (runeIter.next().copy_if_present(rune)) {
2131 if constexpr (std::endian::native != std::endian::big) {
2132 rune = flip_endian(rune);
2133 }
2134 if (auto writeRes = writer.write(rune); writeRes.is_error()) {
2135 return writeRes.error();
2136 }
2137 ++written;
2138 }
2139 return success(written);
2140 }
2141
2151 template<WriterImpl WI>
2152 auto utf32be_to_utf32le(io::Writer<WI> &writer, const SliceConstU32Be &str, bool writeBom = false)
2154 auto runeIter = utf32_rune_iterator(str);
2155 char32_t rune;
2156 size_t written = 0;
2157
2158 if (writeBom) {
2159 if (auto res = writer.write(endian_bom<std::endian::little, char32_t>); res.is_error()) {
2160 return res.error();
2161 }
2162 ++written;
2163 }
2164
2165 while (runeIter.next().copy_if_present(rune)) {
2166 if constexpr (std::endian::native != std::endian::little) {
2167 rune = flip_endian(rune);
2168 }
2169 if (auto writeRes = writer.write(rune); writeRes.is_error()) {
2170 return writeRes.error();
2171 }
2172 ++written;
2173 }
2174 return success(written);
2175 }
2176
2186 template<WriterImpl WI>
2187 auto utf32le_to_utf32(io::Writer<WI> &writer, const SliceConstU32Le &str, bool writeBom = false)
2189 auto runeIter = utf32_rune_iterator(str);
2190 char32_t rune;
2191 size_t written = 0;
2192
2193 if (writeBom) {
2194 if (auto res = writer.write(endian_bom<std::endian::native, char32_t>); res.is_error()) {
2195 return res.error();
2196 }
2197 ++written;
2198 }
2199
2200 while (runeIter.next().copy_if_present(rune)) {
2201 if (auto writeRes = writer.write(rune); writeRes.is_error()) {
2202 return writeRes.error();
2203 }
2204 ++written;
2205 }
2206 return success(written);
2207 }
2208
2218 template<WriterImpl WI>
2219 auto utf32be_to_utf32(io::Writer<WI> &writer, const SliceConstU32Be &str, bool writeBom = false)
2221 auto runeIter = utf32_rune_iterator(str);
2222 char32_t rune;
2223 size_t written = 0;
2224
2225 if (writeBom) {
2226 if (auto res = writer.write(endian_bom<std::endian::native, char32_t>); res.is_error()) {
2227 return res.error();
2228 }
2229 ++written;
2230 }
2231
2232 while (runeIter.next().copy_if_present(rune)) {
2233 if (auto writeRes = writer.write(rune); writeRes.is_error()) {
2234 return writeRes.error();
2235 }
2236 ++written;
2237 }
2238 return success(written);
2239 }
2240
2241} // namespace mtcore
2242
2243#endif // MTSTD_ENCODINGS_HPP
ValIter< T > val(const T &r)
Generic value iterator that uses the operator[] and incrementing indexes to iterate over a collection...
Definition iter.hpp:114
constexpr auto nullopt
Placeholder value for an empty Optional.
Definition optional.hpp:409
ConstPtrIter< T > const_ptr(const T &r)
Generic constant pointer iterator that uses the operator[] and incrementing indexes to iterate over a...
Definition iter.hpp:128
PtrIter< T > ptr(T &r)
Generic pointer iterator that uses the operator[] and incrementing indexes to iterate over a collecti...
Definition iter.hpp:101
constexpr char32_t flip_endian_ch32(const char32_t ch)
Flips the endianness of a char32.
constexpr char16_t flip_endian_ch16(const char16_t ch)
Flips the endianness of a char16.
constexpr auto opposite
endian that is opposite of machine (opposite of std::endian::native)
constexpr auto flip_endian(const T bytes)
Flips the endian of a u16, char16, u32, or char32.
constexpr auto machine
endian of machine (alias for std::endian::native)
RuneConversionError
Errors that can occur when doing conversions to a rune.
SliceWriteError
Errors when writing to a slice.
#define ensure(check,...)
Ensures that a check holds true, aborts the program if not true Will print error if the condition is ...
Success< void > success()
Creates a successful void Result object.
Definition result.hpp:398
uint32_t u32
Alias for 32-bit unsigned ints.
constexpr auto is_bom(const char32_t ch)
Checks if a character is a BOM in native ordering.
EndianSlice< const char16_t, std::endian::big > SliceConstU16Be
Slice type representing const big endian UTF-16 string *.
auto utf32_to_utf32le(io::Writer< WI > &writer, const SliceConstU32Native &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Converts UTF-32 Native Endian to UTF-32 Little Endian.
auto utf32le_to_utf32(io::Writer< WI > &writer, const SliceConstU32Le &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Converts UTF-32 Little Endian to UTF-32 Native Endian.
EndianSlice< const char32_t, std::endian::little > SliceConstU32Le
Slice type representing const little endian UTF-32 string.
Slice< char16_t > slice_from_native_endian(SliceU16Native s)
Gets a normal slice from a native endian slice.
RuneIterator< char8_t > utf8_rune_iterator(const Slice< const char8_t > &s)
Rune iterator for a UTF8 sequence.
auto utf16_to_utf16le(io::Writer< WI > &writer, const SliceConstU16Native &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Converts native UTF-16 to UTF-16 Little Endian.
Optional< std::endian > u16_detect_endian(const char16_t ch)
Tries to detect the endian encoding based on a possible BOM mark.
EndianSlice< const char32_t, std::endian::big > SliceConstU32Be
Slice type representing const big endian UTF-32 string.
auto utf32le_to_utf32be(io::Writer< WI > &writer, const SliceConstU32Le &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Converts UTF-32 Little Endian to UTF-32 Big Endian.
auto utf32_to_utf32be(io::Writer< WI > &writer, const SliceConstU32Native &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Converts UTF-32 Native Endian to UTF-32 Big Endian.
SliceU16Le endian_from_le_slice(Slice< char16_t > s)
Gets a UTF-16 endian slice from a slice which is assumed ot be little endian.
Slice< char16_t > slice_from_le_endian(SliceU16Native s)
Gets a normal slice from a little endian slice.
auto utf32be_to_utf32(io::Writer< WI > &writer, const SliceConstU32Be &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Converts UTF-32 Big Endian to UTF-32 Native Endian.
EndianSlice< char16_t, std::endian::native > SliceU16Native
Slice type representing native UTF-16 string *.
auto utf16_to_utf32(io::Writer< WI > &writer, const SliceConstU16Le &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Will convert a UTF-16 string to UTF-32.
constexpr bool is_low_surrogate(char16_t ch)
Checks if UTF-16 character is a low surrogate *.
auto utf8_to_utf32(io::Writer< WI > &writer, const Slice< const char8_t > &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Will convert a UTF-8 string to UTF-32.
EndianSlice< const char16_t, std::endian::native > SliceConstU16Native
Slice type representing const native UTF-16 string *.
Optional< std::endian > u32_detect_endian(const char32_t ch)
Tries to detect the endian encoding based on a possible BOM mark.
auto str_to_utf32(io::Writer< WI > &writer, const Slice< const char > &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Will convert a UTF-8 string to UTF-32.
auto utf32_to_str(io::Writer< WI > &writer, const SliceConstU32Be &str) -> Result< size_t, typename io::Writer< WI >::ErrType >
Will convert a UTF-32 string to character sequence (UTF-8 encoded chars)
RuneIterator< char32_t > utf32_rune_iterator(const Slice< const char32_t > &s)
Rune iterator for a UTF32 sequence.
auto utf16be_to_utf16le(io::Writer< WI > &writer, const SliceConstU16Be &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Converts UTF-16 Big Endian to UTF-16 Little Endian.
Slice< char16_t > slice_from_be_endian(SliceU16Native s)
Gets a normal slice from a big endian slice.
auto utf16be_to_utf16(io::Writer< WI > &writer, const SliceConstU16Be &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Converts UTF-16 Big Endian to Native UTF-16.
constexpr char32_t bom
Native representation of Byte Order Mark (BOM)
auto utf16le_to_utf16be(io::Writer< WI > &writer, const SliceConstU16Le &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Converts UTF-16 Little Endian to UTF-16 Big Endian.
auto utf32_to_utf8(io::Writer< WI > &writer, const SliceConstU32Be &str) -> Result< size_t, typename io::Writer< WI >::ErrType >
Will convert a UTF-32 string to UTF-8 string.
SliceU16Be endian_from_be_slice(Slice< char16_t > s)
Gets a UTF-16 endian slice from a slice which is assumed ot be big endian.
EndianSlice< char16_t, std::endian::little > SliceU16Le
Slice type representing little endian UTF-16 string *.
auto utf16_to_str(io::Writer< WI > &writer, const SliceConstU16Be &str) -> Result< size_t, typename io::Writer< WI >::ErrType >
Will convert a UTF-16 string to UTF-8.
auto utf32be_to_utf32le(io::Writer< WI > &writer, const SliceConstU32Be &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Converts UTF-32 Big Endian to UTF-32 Little Endian.
constexpr bool is_high_surrogate(char16_t ch)
Checks if UTF-16 character is a high surrogate *.
EndianSlice< char16_t, std::endian::big > SliceU16Be
Slice type representing big endian UTF-16 string *.
auto utf32_to_utf16(io::Writer< WI > &writer, const SliceConstU32Be &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Will convert a UTF-32 string to UTF-16 string Can optionally write a BOM to the prefix.
constexpr auto is_bom_rev_16_bit(const char16_t ch)
Checks if a 16-bit character is a BOM in reverse ordering.
auto utf16_to_utf16be(io::Writer< WI > &writer, const SliceConstU16Native &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Converts native UTF-16 to UTF-16 Big Endian.
auto utf8_to_str(io::Writer< WI > &writer, const Slice< const char8_t > &str) -> Result< size_t, typename io::Writer< WI >::ErrType >
Will convert a UTF-8 string to character sequence.
RuneIterator< char16_t > utf16_rune_iterator(const Slice< const char16_t > &s)
Rune iterator for a UTF16 sequence.
RuneIterator< char > str_rune_iterator(const Slice< const char > &s)
Rune iterator for a slice of characters.
U8Seq rune_to_utf8(Rune rune)
Converts a UTF-32 Rune to a utf-8 sequence.
EndianSlice< char32_t, std::endian::little > SliceU32Le
Slice type representing little endian UTF-32 string *.
constexpr auto is_bom_rev_32_bit(const char32_t ch)
Checks if a 32-bit character is a BOM in reverse ordering.
constexpr bool is_surrogate(char16_t ch)
Checks if UTF-16 character is a surrogate.
auto utf16le_to_utf16(io::Writer< WI > &writer, const SliceConstU16Le &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Converts UTF-16 Little Endian to Native UTF-16.
EndianSlice< const char32_t, std::endian::native > SliceConstU32Native
Slice type representing const native UTF-32 string.
auto str_to_utf16(io::Writer< WI > &writer, const Slice< const char > &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Will convert a UTF-8 string to UTF-16.
U16Seq rune_to_utf16(Rune rune)
Converts a UTF-32 Rune to a utf-16 sequence.
constexpr auto oppositeEndian
Returns the opposite endianness of a given endian (default is opposite of the native endian)
CharSeq rune_to_str(Rune rune)
Converts a UTF-32 Rune to a sequence of char.
EndianSlice< char32_t, std::endian::native > SliceU32Native
Slice type representing native UTF-32 string *.
EndianSlice< char32_t, std::endian::big > SliceU32Be
Slice type representing big endian UTF-32 string.
std::variant< SliceU16Le, SliceU16Be > endian_from_slice(Slice< char16_t > s)
Gets a UTF-16 endian slice from an input slice Will try to use the first character of the slice as a ...
auto utf16_to_utf8(io::Writer< WI > &writer, const SliceConstU16Be &str) -> Result< size_t, typename io::Writer< WI >::ErrType >
Will convert a UTF-16 string to UTF-8.
SliceU16Native endian_from_native_slice(Slice< char16_t > s)
Gets a UTF-16 endian slice from a slice which is assumed ot be native endian.
auto utf8_to_utf16(io::Writer< WI > &writer, const Slice< const char8_t > &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Will convert a UTF-8 string to UTF-16.
auto str_to_utf8(io::Writer< WI > &writer, const Slice< const char > &str) -> Result< size_t, typename io::Writer< WI >::ErrType >
Will convert a character sequence to UTF-8 string.
EndianSlice< const char16_t, std::endian::little > SliceConstU16Le
Slice type representing const little endian UTF-16 string *.
Generic iterator defaults built on common contracts Does not guarantee performance of iterators Actua...
Definition iter.hpp:91
constexpr auto byte
Definition storage.hpp:27
Core library for C++ with Zig-related functionality.
constexpr T endian_bom
char32_t Rune
Result< Rune, RuneConversionError > utf8_to_rune(const Slice< const char8_t > &rune)
UTF-8 to a Rune.
impl::ShortSeq< char, 4 > CharSeq
Result< Rune, RuneConversionError > utf16_to_rune(const Slice< const char16_t > &rune)
UTF-16 to a Rune.
Result< Rune, RuneConversionError > str_to_rune(const Slice< const char > &rune)
Characters to a Rune.
impl::ShortSeq< char16_t, 2 > U16Seq
impl::ShortSeq< char8_t, 4 > U8Seq
Endian Wrapper for grabbing the endian value By default, will handle converting to and from the nativ...
constexpr T as_endian(std::endian viewAs) const
Gets the value with the specific endian.
Endian Wrapper for grabbing the endian value By default, will handle converting to and from the nativ...
constexpr EndianWrapper & operator=(T val)
Sets the pointed at value using the native endian.
constexpr EndianWrapper & with_endian(T newVal, std::endian setAs)
Sets the target value with the provided endian interpretation.
constexpr T as_endian(std::endian viewAs) const
Gets the value with the specific endian.
A Slice with Endian-aware data which is just a pointer + length + endian direction Accessing elements...
bool operator>=(const EndianSlice< std::add_const_t< T >, E > &o) const noexcept
bool operator>(const EndianSlice< std::remove_const_t< T >, E > &o) const noexcept
constexpr ConstEndianWrapper at(size_t i) const noexcept
Access element at a specific index Const operator, returns a const reference.
EndianWrapper operator[](size_t i) noexcept
Access element at a specific index Non-const operator, returns a mutable reference.
constexpr void init(T *head, size_t len)
Initializes a Slice Using init instead of a constructor so that slices allocated with malloc or an ar...
bool operator<=(const EndianSlice< std::remove_const_t< T >, E > &o) const noexcept
constexpr EndianSlice sub(size_t start) const noexcept
Gets a sub Slice from start to end.
bool operator<(const EndianSlice< std::remove_const_t< T >, E > &o) const noexcept
bool operator!=(const EndianSlice< std::add_const_t< T >, E > &o) const noexcept
bool operator>=(const EndianSlice< std::remove_const_t< T >, E > &o) const noexcept
EndianWrapper at(size_t i) noexcept
Access element at a specific index Non-const operator, returns a mutable reference.
constexpr Slice< std::add_const_t< T > > to_const() const noexcept
Converts to a const Slice.
constexpr bool empty() const noexcept
Checks if a Slice is empty.
decltype(auto) ptr_iter() const noexcept
decltype(auto) ptr_iter() noexcept
bool operator!=(const EndianSlice< std::remove_const_t< T >, E > &o) const noexcept
bool operator<=(const EndianSlice< std::add_const_t< T >, E > &o) const noexcept
bool operator==(const EndianSlice< std::remove_const_t< T >, E > &o) const noexcept
std::strong_ordering operator<=>(const EndianSlice< std::remove_const_t< T >, E > &other) const noexcept
Compares against another Slice.
decltype(auto) iter() const noexcept
constexpr ConstEndianWrapper operator[](size_t i) const noexcept
Access element at a specific index Const operator, returns a const reference.
bool operator>(const EndianSlice< std::add_const_t< T >, E > &o) const noexcept
constexpr EndianSlice sub(size_t start, size_t len) const noexcept
Gets a sub Slice from start up to a length.
constexpr size_t size() const noexcept
Gets the size of a Slice.
std::strong_ordering operator<=>(const EndianSlice< std::add_const_t< T >, E > &other) const noexcept
Compares against another Slice.
bool operator==(const EndianSlice< std::add_const_t< T >, E > &o) const noexcept
bool operator<(const EndianSlice< std::add_const_t< T >, E > &o) const noexcept
Represents a value that may or may not exist (an "Optional" value) Similar concept to std::optional,...
Definition optional.hpp:235
Represents a Result that may have an error (error code) or a success value A type of "void" means the...
Definition result.hpp:170
Optional< char32_t > next()
Slice< const char16_t > data
Slice< const char32_t > data
Optional< char32_t > next()
Iterates over runes of utf8 sequences (char, char_t)
Slice< const T > data
Optional< Rune > next()
Gets the next Rune, if present.
A Slice which is just a pointer + length Accessing elements through the array operator will do bounds...
constexpr Slice sub(size_t start) const noexcept
Gets a sub Slice from start.
constexpr size_t size() const noexcept
Gets the size of a Slice.
A writer that writes data to some sort of stream or buffer Note: the data elements written should be ...
Definition io/writer.hpp:51