MT Core (C++)
Core library for replacing C++ standard in project usage
Loading...
Searching...
No Matches
byte_endian.hpp
Go to the documentation of this file.
1/*
2
3Copyright 2025 Matthew Tolman
4
5Licensed under the Apache License, Version 2.0 (the "License");
6you may not use this file except in compliance with the License.
7You may obtain a copy of the License at
8
9 http://www.apache.org/licenses/LICENSE-2.0
10
11Unless required by applicable law or agreed to in writing, software
12distributed under the License is distributed on an "AS IS" BASIS,
13WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14See the License for the specific language governing permissions and
15limitations under the License.
16
17*/
18
19#ifndef MTSTD_ENCODINGS_HPP
20#define MTSTD_ENCODINGS_HPP
21
22#include "mtcore/byte_order.hpp"
24#include "mtcore/io/writer.hpp"
25#include <array>
26
31
34namespace mtcore {
39 constexpr char32_t bom = 0xFEFF;
40
45 constexpr auto is_bom(const char32_t ch) { return ch == bom; }
50 constexpr auto is_bom_rev_16_bit(const char16_t ch) { return ch == 0xFFFE; }
55 constexpr auto is_bom_rev_32_bit(const char32_t ch) { return ch == 0xFFFE0000; }
56
57 template<std::endian target, typename T = char32_t>
58 constexpr T endian_bom = target == std::endian::native ? bom : flip_endian<T>(bom);
59
66 inline Optional<std::endian> u16_detect_endian(const char16_t ch) {
67 if (is_bom(ch)) {
68 return endian::machine;
69 }
70 else if (is_bom_rev_16_bit(ch)) {
71 return endian::opposite;
72 }
73 else {
74 return nullopt;
75 }
76 }
77
84 inline Optional<std::endian> u32_detect_endian(const char32_t ch) {
85 if (is_bom(ch)) {
86 return endian::machine;
87 }
88 else if (is_bom_rev_32_bit(ch)) {
89 return endian::opposite;
90 }
91 else {
92 return nullopt;
93 }
94 }
95
96 namespace impl {
103 template<typename T, size_t MaxLen>
104 struct ShortSeq {
105 using Elem = T;
106 int len;
107 std::array<Elem, MaxLen> elems;
108
109 [[nodiscard]] size_t size() const {
110 ensure(len < MaxLen);
111 return len;
112 }
113
114 T &operator[](size_t i) {
115 ensure(i <= MaxLen, "out of bounds");
116 ensure(static_cast<int>(i) < len, "out of bounds");
117 return elems[i];
118 }
119
120 const T &operator[](size_t i) const {
121 ensure(i <= MaxLen, "out of bounds");
122 ensure(static_cast<int>(i) < len, "out of bounds");
123 return elems[i];
124 }
125
126 [[nodiscard]] T &at(size_t i) noexcept { return (*this)[i]; }
127 [[nodiscard]] constexpr const T &at(size_t i) const noexcept { return (*this)[i]; }
128
129 [[nodiscard]] decltype(auto) ptr_iter() noexcept { return iter::ptr(*this); }
130 [[nodiscard]] decltype(auto) ptr_iter() const noexcept { return iter::const_ptr(*this); }
131 [[nodiscard]] decltype(auto) iter() const noexcept { return iter::val(*this); }
132 };
133 } // namespace impl
134
135 using CharSeq = impl::ShortSeq<char, 4>;
136 using U8Seq = impl::ShortSeq<char8_t, 4>;
137 using U16Seq = impl::ShortSeq<char16_t, 2>;
138
143 constexpr bool is_surrogate(char16_t ch) { return ch >= 0xD800 && ch <= 0xDFFF; }
147 constexpr bool is_high_surrogate(char16_t ch) { return ch >= 0xD800 && ch <= 0xDBFF; }
151 constexpr bool is_low_surrogate(char16_t ch) { return ch >= 0xDC00 && ch <= 0xDFFF; }
152
157 template<std::endian E = std::endian::native>
158 constexpr auto oppositeEndian = E == std::endian::big ? std::endian::little : std::endian::big;
159
172 template<class T, std::endian Endian = std::endian::native>
173 struct EndianSlice {
174 static_assert(std::is_same_v<T, const char16_t> || std::is_same_v<T, char16_t> ||
175 std::is_same_v<T, const char32_t> || std::is_same_v<T, char32_t>);
176 using Elem = T;
177
178 T *head = nullptr;
179 size_t len = 0;
180
181 [[nodiscard]] decltype(auto) ptr_iter() noexcept { return iter::ptr(*this); }
182 [[nodiscard]] decltype(auto) ptr_iter() const noexcept { return iter::const_ptr(*this); }
183 [[nodiscard]] decltype(auto) iter() const noexcept { return iter::val(*this); }
184
186 [[nodiscard]] constexpr Slice<std::add_const_t<T>> to_const() const noexcept {
187 return {static_cast<std::add_pointer_t<std::add_const_t<T>>>(head), len};
188 }
189
198 constexpr void init(T *head, size_t len) {
199 this->head = head;
200 this->len = len;
201 }
202
210 size_t i;
211
215 constexpr operator T() const { return as_endian(std::endian::native); }
216
217 [[nodiscard]] constexpr auto native() const { return as_endian(std::endian::native); }
218
223 [[nodiscard]] constexpr T as_endian(std::endian viewAs) const {
224 ensure(i < slice.size(), "SLICE ACCESS OUT OF BOUNDS");
225 ensure(slice.head, "NULL POINTER DEREFERENCE");
226 auto v = slice.head[i];
227 if (viewAs != Endian) {
228 return flip_endian(v);
229 }
230 return v;
231 }
232 };
233
241 size_t i;
242
246 constexpr operator T() const { return as_endian(std::endian::native); }
247
251 constexpr EndianWrapper &operator=(T val) { return with_endian(val, std::endian::native); }
252
253 [[nodiscard]] constexpr T native() const { return as_endian(std::endian::native); }
254
259 [[nodiscard]] constexpr T as_endian(std::endian viewAs) const {
260 ensure(i < slice.size(), "SLICE ACCESS OUT OF BOUNDS");
261 ensure(slice.head, "NULL POINTER DEREFERENCE");
262 auto v = slice.head[i];
263 if (viewAs != Endian) {
264 return flip_endian(v);
265 }
266 return v;
267 }
268
274 constexpr EndianWrapper &with_endian(T newVal, std::endian setAs) {
275 ensure(i < slice.size(), "SLICE ACCESS OUT OF BOUNDS");
276 ensure(slice.head, "NULL POINTER DEREFERENCE");
277 if (setAs != Endian) {
278 newVal = flip_endian(newVal);
279 }
280 slice.head[i] = newVal;
281 return *this;
282 }
283 };
284
290 [[nodiscard]] constexpr ConstEndianWrapper operator[](size_t i) const noexcept {
291 ensure(i < len, "SLICE ACCESS OUT OF BOUNDS");
292 ensure(head, "NULL POINTER DEREFERENCE");
293 return ConstEndianWrapper{*this, i};
294 }
295
301 EndianWrapper operator[](size_t i) noexcept {
302 ensure(i < len, "SLICE ACCESS OUT OF BOUNDS");
303 ensure(head, "NULL POINTER DEREFERENCE");
304 return EndianWrapper{*this, i};
305 }
306
312 EndianWrapper at(size_t i) noexcept { return (*this)[i]; }
313
319 [[nodiscard]] constexpr ConstEndianWrapper at(size_t i) const noexcept { return (*this)[i]; }
320
324 [[nodiscard]] constexpr size_t size() const noexcept { return head != nullptr ? len : 0; }
325
329 [[nodiscard]] constexpr bool empty() const noexcept { return head == nullptr || len == 0; }
330
335 [[nodiscard]] constexpr EndianSlice sub(size_t start) const noexcept {
336 EndianSlice res;
337 if (start >= len) {
338 res.init(nullptr, 0);
339 return res;
340 }
341
342 res.init(head + start, len - start);
343 ensure(res.len + start == len, "BAD LENGTH MATH");
344 ensure(head + len == res.head + res.len, "BAD END POINTER");
345 return res;
346 }
347
353 [[nodiscard]] constexpr EndianSlice sub(size_t start, size_t len) const noexcept {
354 auto res = sub(start);
355 res.len = res.len > len ? len : res.len;
356 ensure(res.len <= len, "BAD LENGTH");
357 return res;
358 }
359
365 template<std::endian E>
366 std::strong_ordering operator<=>(const EndianSlice<std::remove_const_t<T>, E> &other) const noexcept {
367 return *this <=> other.to_const();
368 }
369
375 template<std::endian E>
376 std::strong_ordering operator<=>(const EndianSlice<std::add_const_t<T>, E> &other) const noexcept {
377 if (!head) {
378 if (other.head) {
379 return std::strong_ordering::less;
380 }
381 return std::strong_ordering::equal;
382 }
383
384 for (size_t i = 0; i < len && i < other.len; ++i) {
385 auto tv = (*this)[i].native();
386 auto ov = other[i].native();
387 if (tv > ov) {
388 return std::strong_ordering::greater;
389 }
390 else if (tv < ov) {
391 return std::strong_ordering::less;
392 }
393 }
394
395 if (len == other.len) {
396 return std::strong_ordering::equal;
397 }
398 else if (len < other.len) {
399 return std::strong_ordering::less;
400 }
401 else {
402 return std::strong_ordering::greater;
403 }
404 }
405
406 template<std::endian E>
407 bool operator==(const EndianSlice<std::remove_const_t<T>, E> &o) const noexcept {
408 return (*this <=> o) == std::strong_ordering::equal;
409 }
410
411 template<std::endian E>
412 bool operator!=(const EndianSlice<std::remove_const_t<T>, E> &o) const noexcept {
413 return (*this <=> o) != std::strong_ordering::equal;
414 }
415
416 template<std::endian E>
417 bool operator<(const EndianSlice<std::remove_const_t<T>, E> &o) const noexcept {
418 return (*this <=> o) == std::strong_ordering::less;
419 }
420
421 template<std::endian E>
422 bool operator>(const EndianSlice<std::remove_const_t<T>, E> &o) const noexcept {
423 return (*this <=> o) == std::strong_ordering::greater;
424 }
425
426 template<std::endian E>
427 bool operator<=(const EndianSlice<std::remove_const_t<T>, E> &o) const noexcept {
428 const auto cmp = *this <=> o;
429 return (cmp == std::strong_ordering::less || cmp == std::strong_ordering::equal);
430 }
431
432 template<std::endian E>
433 bool operator>=(const EndianSlice<std::remove_const_t<T>, E> &o) const noexcept {
434 const auto cmp = *this <=> o;
435 return (cmp == std::strong_ordering::greater || cmp == std::strong_ordering::equal);
436 }
437
438 template<std::endian E>
439 bool operator==(const EndianSlice<std::add_const_t<T>, E> &o) const noexcept {
440 return (*this <=> o) == std::strong_ordering::equal;
441 }
442
443 template<std::endian E>
444 bool operator!=(const EndianSlice<std::add_const_t<T>, E> &o) const noexcept {
445 return (*this <=> o) != std::strong_ordering::equal;
446 }
447
448 template<std::endian E>
449 bool operator<(const EndianSlice<std::add_const_t<T>, E> &o) const noexcept {
450 return (*this <=> o) == std::strong_ordering::less;
451 }
452
453 template<std::endian E>
454 bool operator>(const EndianSlice<std::add_const_t<T>, E> &o) const noexcept {
455 return (*this <=> o) == std::strong_ordering::greater;
456 }
457
458 template<std::endian E>
459 bool operator<=(const EndianSlice<std::add_const_t<T>, E> &o) const noexcept {
460 const auto cmp = *this <=> o;
461 return (cmp == std::strong_ordering::less || cmp == std::strong_ordering::equal);
462 }
463
464 template<std::endian E>
465 bool operator>=(const EndianSlice<std::add_const_t<T>, E> &o) const noexcept {
466 const auto cmp = *this <=> o;
467 return (cmp == std::strong_ordering::greater || cmp == std::strong_ordering::equal);
468 }
469 };
470
471 namespace impl {
472 template<typename T, std::endian E>
473 struct EndianSliceWriterImpl {
474 static_assert(!std::is_const_v<T>, "Cannot write to const pointer!");
475 EndianSlice<T, E> out;
476 size_t curWriteIndex = 0;
477
478 using WriteElem = T;
479 using ErrType = io::SliceWriteError;
480
481 Result<size_t, ErrType> write(Slice<std::add_const_t<T>> bytes) {
482 size_t i = 0;
483 for (; i < bytes.size() && curWriteIndex < out.size(); ++i, ++curWriteIndex) {
484 out[curWriteIndex] = bytes[i];
485 }
486 return success(i);
487 }
488
489 template<std::endian E2>
490 Result<size_t, ErrType> write(EndianSlice<std::add_const_t<T>, E2> bytes) {
491 size_t i = 0;
492 for (; i < bytes.size() && curWriteIndex < out.size(); ++i, ++curWriteIndex) {
493 out[curWriteIndex] = bytes[i].native();
494 }
495 return success(i);
496 }
497 };
498 } // namespace impl
499
524
553
558 inline SliceU16Native endian_from_native_slice(Slice<char16_t> s) { return {.head = s.head, .len = s.len}; }
563 inline SliceU32Native endian_from_native_slice(Slice<char32_t> s) { return {.head = s.head, .len = s.len}; }
564
569 inline SliceU16Le endian_from_le_slice(Slice<char16_t> s) { return {.head = s.head, .len = s.len}; }
574 inline SliceU32Le endian_from_le_slice(Slice<char32_t> s) { return {.head = s.head, .len = s.len}; }
575
580 inline SliceU16Be endian_from_be_slice(Slice<char16_t> s) { return {.head = s.head, .len = s.len}; }
585 inline SliceU32Be endian_from_be_slice(Slice<char32_t> s) { return {.head = s.head, .len = s.len}; }
586
591 inline SliceConstU16Native endian_from_native_slice(Slice<const char16_t> s) {
592 return {.head = s.head, .len = s.len};
593 }
594 /*
595 * Gets a const UTF-32 endian slice from a slice which is assumed ot be native endian
596 * @ingroup unicode_enc
597 */
599 return {.head = s.head, .len = s.len};
600 }
601
606 inline SliceConstU16Le endian_from_le_slice(Slice<const char16_t> s) { return {.head = s.head, .len = s.len}; }
611 inline SliceConstU32Le endian_from_le_slice(Slice<const char32_t> s) { return {.head = s.head, .len = s.len}; }
612
617 inline SliceConstU16Be endian_from_be_slice(Slice<const char16_t> s) { return {.head = s.head, .len = s.len}; }
622 inline SliceConstU32Be endian_from_be_slice(Slice<const char32_t> s) { return {.head = s.head, .len = s.len}; }
623
632 inline std::variant<SliceU16Le, SliceU16Be> endian_from_slice(Slice<char16_t> s) {
633 if (s.size() > 0) {
634 if (auto detect = u16_detect_endian(s[0]); detect.has_value()) {
635 auto subbed = s.sub(1);
636 if (detect.value() == std::endian::little) {
637 return SliceU16Le{.head = subbed.head, .len = subbed.len};
638 }
639 else {
640 return SliceU16Be{.head = subbed.head, .len = subbed.len};
641 }
642 }
643 }
644 return SliceU16Native{.head = s.head, .len = s.len};
645 }
646
655 inline std::variant<SliceU32Le, SliceU32Be> endian_from_slice(Slice<char32_t> s) {
656 if (s.size() > 0) {
657 if (auto detect = u16_detect_endian(s[0]); detect.has_value()) {
658 auto subbed = s.sub(1);
659 if (detect.value() == std::endian::little) {
660 return SliceU32Le{.head = subbed.head, .len = subbed.len};
661 }
662 else {
663 return SliceU32Be{.head = subbed.head, .len = subbed.len};
664 }
665 }
666 }
667 return SliceU32Native{.head = s.head, .len = s.len};
668 }
669
678 inline std::variant<SliceConstU16Le, SliceConstU16Be> endian_from_slice(Slice<const char16_t> s) {
679 if (s.size() > 0) {
680 if (auto detect = u16_detect_endian(s[0]); detect.has_value()) {
681 auto subbed = s.sub(1);
682 if (detect.value() == std::endian::little) {
683 return SliceConstU16Le{.head = subbed.head, .len = subbed.len};
684 }
685 else {
686 return SliceConstU16Be{.head = subbed.head, .len = subbed.len};
687 }
688 }
689 }
690 return SliceConstU16Native{.head = s.head, .len = s.len};
691 }
692
701 inline std::variant<SliceConstU32Le, SliceConstU32Be> endian_from_slice(Slice<const char32_t> s) {
702 if (s.size() > 0) {
703 if (auto detect = u16_detect_endian(s[0]); detect.has_value()) {
704 auto subbed = s.sub(1);
705 if (detect.value() == std::endian::little) {
706 return SliceConstU32Le{.head = subbed.head, .len = subbed.len};
707 }
708 else {
709 return SliceConstU32Be{.head = subbed.head, .len = subbed.len};
710 }
711 }
712 }
713 return SliceConstU32Native{.head = s.head, .len = s.len};
714 }
715
720 inline Slice<char16_t> slice_from_native_endian(SliceU16Native s) { return {.head = s.head, .len = s.len}; }
726 return {.head = s.head, .len = s.len};
727 }
728
732 inline Slice<char32_t> slice_from_native_endian(SliceU32Native s) { return {.head = s.head, .len = s.len}; }
738 return {.head = s.head, .len = s.len};
739 }
740
745 inline Slice<char16_t> slice_from_le_endian(SliceU16Native s) { return {.head = s.head, .len = s.len}; }
750 inline Slice<const char16_t> slice_from_le_endian(SliceConstU16Native s) { return {.head = s.head, .len = s.len}; }
755 inline Slice<char32_t> slice_from_le_endian(SliceU32Native s) { return {.head = s.head, .len = s.len}; }
760 inline Slice<const char32_t> slice_from_le_endian(SliceConstU32Native s) { return {.head = s.head, .len = s.len}; }
761
766 inline Slice<char16_t> slice_from_be_endian(SliceU16Native s) { return {.head = s.head, .len = s.len}; }
771 inline Slice<const char16_t> slice_from_be_endian(SliceConstU16Native s) { return {.head = s.head, .len = s.len}; }
776 inline Slice<char32_t> slice_from_be_endian(SliceU32Native s) { return {.head = s.head, .len = s.len}; }
781 inline Slice<const char32_t> slice_from_be_endian(SliceConstU32Native s) { return {.head = s.head, .len = s.len}; }
782
783 using Rune = char32_t;
784
800
809
810 namespace impl::unicode {
811 constexpr char16_t utf16HighOffset = 0xD800;
812 constexpr char16_t utf16LowOffset = 0xDC00;
813 constexpr char32_t utf16Sub = 0x10000;
814
815 constexpr uint8_t continueFlag = 0b10000000;
816
817 constexpr auto runeMasks = std::array{
818 static_cast<u32>(0b111110000000000000000),
819 static_cast<u32>(0b000001111100000000000),
820 static_cast<u32>(0b000000000011110000000),
821 };
822
823 constexpr auto utf8StartFlags = std::array<std::tuple<uint8_t, int, uint8_t>, 5>{
824 std::make_tuple(0b11110000, 4, 0b00000111), std::make_tuple(0b11100000, 3, 0b00001111),
825 std::make_tuple(0b11000000, 2, 0b00011111), std::make_tuple(continueFlag, -1, 0b00111111),
826 std::make_tuple(0b00000000, 1, 0b01111111),
827 };
828
829 constexpr auto num_bytes(char8_t byte) -> int {
830 for (const auto &[flagSet, bytes, _flag_removal]: utf8StartFlags) {
831 if (byte >= flagSet) {
832 return bytes;
833 }
834 }
835 return -1;
836 }
837
838 constexpr auto without_flag(char8_t byte) -> uint8_t {
839 for (const auto &[flagSet, _bytes, removeFlag]: utf8StartFlags) {
840 if (byte >= flagSet) {
841 return byte & removeFlag;
842 }
843 }
844 return byte;
845 }
846
847 constexpr auto get_flag(char8_t byte) -> uint8_t {
848 for (const auto &[flagSet, _bytes, _removeFlag]: utf8StartFlags) {
849 if (byte >= flagSet) {
850 return flagSet;
851 }
852 }
853 return 0;
854 }
855 } // namespace impl::unicode
856
861
866
871 template<typename T>
873 using IterElem = Rune;
875 size_t strIndex = 0;
876
879 if (strIndex >= data.size()) {
880 return nullopt;
881 }
882 auto curIndex = strIndex;
883 char32_t currentRune = 0;
884 auto numBytes = impl::unicode::num_bytes(data[curIndex]);
885
886 if (numBytes < 0) {
887 ++strIndex;
888 }
889 else if (curIndex + numBytes - 1 >= data.size()) {
890 strIndex = data.size();
891 }
892 else {
893 strIndex = curIndex + numBytes;
894 for (size_t index = curIndex; index < strIndex; ++index) {
895 auto flag = impl::unicode::get_flag(data[index]);
896 if (index > curIndex && flag != impl::unicode::continueFlag) {
897 strIndex = curIndex + index - 1;
898 currentRune = 0;
899 break;
900 }
901 auto noFlag = impl::unicode::without_flag(data[index]);
902 currentRune <<= 6;
903 currentRune |= noFlag;
904 }
905 }
906 return currentRune;
907 }
908 };
909
914 template<>
915 struct RuneIterator<char32_t> {
916 using IterElem = char32_t;
918 size_t strIndex = 0;
919 bool reverseBom = false;
920
922 if (strIndex >= data.len) {
923 return nullopt;
924 }
925 const auto res = data[strIndex++];
926 if (reverseBom) {
927 return flip_endian_ch32(res);
928 }
929 return res;
930 }
931 };
932
938 template<>
939 struct RuneIterator<char16_t> {
940 using IterElem = char32_t;
942 size_t strIndex = 0;
943 bool reverseBom = false;
944
946 if (strIndex >= data.size()) {
947 return nullopt;
948 }
949 size_t curIndex = strIndex;
950 char32_t currentRune = 0;
951
952 auto ch = data[curIndex];
953 if (reverseBom) {
954 ch = flip_endian_ch16(ch);
955 }
956
957 if (is_high_surrogate(ch)) {
958 if (curIndex + 1 >= data.size()) {
959 strIndex = data.size();
960 return currentRune;
961 }
962 else {
963 auto chNext = data[curIndex + 1];
964 if (reverseBom) {
965 chNext = flip_endian_ch16(chNext);
966 }
967
968 const auto high = (ch - impl::unicode::utf16HighOffset) * 0x400;
969 const auto low = chNext - impl::unicode::utf16LowOffset;
970 currentRune = high + low + impl::unicode::utf16Sub;
971 strIndex = curIndex + 2;
972 }
973 }
974 else {
975 if (!is_low_surrogate(ch)) {
976 currentRune = static_cast<char32_t>(ch);
977 if (reverseBom) {
978 flip_endian_ch16(currentRune);
979 }
980 }
981 ++strIndex;
982 }
983 return currentRune;
984 }
985 };
986
991 inline RuneIterator<char> str_rune_iterator(const Slice<const char> &s) { return {.data = s, .strIndex = 0}; }
992
997 inline RuneIterator<char8_t> utf8_rune_iterator(const Slice<const char8_t> &s) { return {.data = s, .strIndex = 0}; }
998
1004 if (s.size()) {
1005 if (const auto encoding = u16_detect_endian(s[0]); encoding.has_value()) {
1006 return {.data = s.sub(1), .strIndex = 0, .reverseBom = encoding == endian::opposite};
1007 }
1008 }
1009 return {.data = s, .strIndex = 0};
1010 }
1011
1017 return {
1018 .data = {.head = s.head, .len = s.len}, .strIndex = 0, .reverseBom = std::endian::native != std::endian::little};
1019 }
1020
1026 return {
1027 .data = {.head = s.head, .len = s.len}, .strIndex = 0, .reverseBom = std::endian::native != std::endian::big};
1028 }
1029
1035 return {
1036 .data = {.head = s.head, .len = s.len}, .strIndex = 0, .reverseBom = std::endian::native != std::endian::little};
1037 }
1038
1044 return {
1045 .data = {.head = s.head, .len = s.len}, .strIndex = 0, .reverseBom = std::endian::native != std::endian::big};
1046 }
1047
1053 if (s.size()) {
1054 if (const auto encoding = u32_detect_endian(s[0]); encoding.has_value()) {
1055 return {.data = s.sub(1), .strIndex = 0, .reverseBom = encoding == endian::opposite};
1056 }
1057 }
1058 return {.data = s, .strIndex = 0};
1059 }
1060
1066 return {
1067 .data = {.head = s.head, .len = s.len}, .strIndex = 0, .reverseBom = std::endian::native != std::endian::little};
1068 }
1069
1075 return {
1076 .data = {.head = s.head, .len = s.len}, .strIndex = 0, .reverseBom = std::endian::native != std::endian::big};
1077 }
1078
1084 return {
1085 .data = {.head = s.head, .len = s.len}, .strIndex = 0, .reverseBom = std::endian::native != std::endian::little};
1086 }
1087
1093 return {
1094 .data = {.head = s.head, .len = s.len}, .strIndex = 0, .reverseBom = std::endian::native != std::endian::big};
1095 }
1096
1107 template<WriterImpl WI>
1108 auto utf32_to_utf16(io::Writer<WI> &writer, const SliceConstU32Be &str, bool writeBom = false)
1110 char32_t cur;
1111 auto iter = utf32_rune_iterator(str);
1112 size_t written = 0;
1113
1114 if (writeBom) {
1115 if (auto res = writer.write(bom); res.is_error()) {
1116 return res.error();
1117 }
1118 ++written;
1119 }
1120
1121 while (iter.next().copy_if_present(cur)) {
1122 auto seq = rune_to_utf16(cur);
1123 char16_t curCh;
1124 auto iter2 = seq.iter();
1125 while (iter2.next().copy_if_present(curCh)) {
1126 if (auto writeRes = writer.write(curCh); writeRes.is_error()) {
1127 return writeRes.error();
1128 }
1129 ++written;
1130 }
1131 }
1132 return success(written);
1133 }
1134
1145 template<WriterImpl WI>
1146 auto utf32_to_utf16(io::Writer<WI> &writer, const SliceConstU32Le &str, bool writeBom = false)
1148 char32_t cur;
1149 auto iter = utf32_rune_iterator(str);
1150 size_t written = 0;
1151
1152 if (writeBom) {
1153 if (auto res = writer.write(bom); res.is_error()) {
1154 return res.error();
1155 }
1156 ++written;
1157 }
1158
1159 while (iter.next().copy_if_present(cur)) {
1160 auto seq = rune_to_utf16(cur);
1161 char16_t curCh;
1162 auto iter2 = seq.iter();
1163 while (iter2.next().copy_if_present(curCh)) {
1164 if (auto writeRes = writer.write(curCh); writeRes.is_error()) {
1165 return writeRes.error();
1166 }
1167 ++written;
1168 }
1169 }
1170 return success(written);
1171 }
1172
1183 template<WriterImpl WI>
1184 auto utf32_to_utf16(io::Writer<WI> &writer, const Slice<const char32_t> &str, bool writeBom = false)
1186 return utf32_to_utf16(writer, endian_from_native_slice(str), writeBom);
1187 }
1188
1197 template<WriterImpl WI>
1199 char32_t cur;
1200 auto iter = utf32_rune_iterator(str);
1201 size_t written = 0;
1202 while (iter.next().copy_if_present(cur)) {
1203 auto seq = rune_to_utf8(cur);
1204 char8_t curCh;
1205 auto iter2 = seq.iter();
1206 while (iter2.next().copy_if_present(curCh)) {
1207 if (auto writeRes = writer.write(curCh); writeRes.is_error()) {
1208 return writeRes.error();
1209 }
1210 ++written;
1211 }
1212 }
1213 return success(written);
1214 }
1215
1224 template<WriterImpl WI>
1226 char32_t cur;
1227 auto iter = utf32_rune_iterator(str);
1228 size_t written = 0;
1229 while (iter.next().copy_if_present(cur)) {
1230 auto seq = rune_to_utf8(cur);
1231 char8_t curCh;
1232 auto iter2 = seq.iter();
1233 while (iter2.next().copy_if_present(curCh)) {
1234 if (auto writeRes = writer.write(curCh); writeRes.is_error()) {
1235 return writeRes.error();
1236 }
1237 ++written;
1238 }
1239 }
1240 return success(written);
1241 }
1242
1251 template<WriterImpl WI>
1256
1265 template<WriterImpl WI>
1267 char32_t cur;
1268 auto iter = utf32_rune_iterator(str);
1269 size_t written = 0;
1270 while (iter.next().copy_if_present(cur)) {
1271 auto seq = rune_to_str(cur);
1272 char curCh;
1273 auto iter2 = seq.iter();
1274 while (iter2.next().copy_if_present(curCh)) {
1275 if (auto writeRes = writer.write(curCh); writeRes.is_error()) {
1276 return writeRes.error();
1277 }
1278 ++written;
1279 }
1280 }
1281 return success(written);
1282 }
1283
1292 template<WriterImpl WI>
1294 char32_t cur;
1295 auto iter = utf32_rune_iterator(str);
1296 size_t written = 0;
1297 while (iter.next().copy_if_present(cur)) {
1298 auto seq = rune_to_str(cur);
1299 char curCh;
1300 auto iter2 = seq.iter();
1301 while (iter2.next().copy_if_present(curCh)) {
1302 if (auto writeRes = writer.write(curCh); writeRes.is_error()) {
1303 return writeRes.error();
1304 }
1305 ++written;
1306 }
1307 }
1308 return success(written);
1309 }
1310
1319 template<WriterImpl WI>
1324
1334 template<WriterImpl WI>
1335 auto utf8_to_utf32(io::Writer<WI> &writer, const Slice<const char8_t> &str, bool writeBom = false)
1337 auto runeIter = utf8_rune_iterator(str);
1338 char32_t rune;
1339 size_t written = 0;
1340
1341 if (writeBom) {
1342 if (auto res = writer.write(bom); res.is_error()) {
1343 return res.error();
1344 }
1345 ++written;
1346 }
1347
1348 while (runeIter.next().copy_if_present(rune)) {
1349 if (auto writeRes = writer.write(rune); writeRes.is_error()) {
1350 return writeRes.error();
1351 }
1352 ++written;
1353 }
1354 return success(written);
1355 }
1356
1366 template<WriterImpl WI>
1367 auto str_to_utf32(io::Writer<WI> &writer, const Slice<const char> &str, bool writeBom = false)
1369 auto runeIter = str_rune_iterator(str);
1370 char32_t rune;
1371 size_t written = 0;
1372
1373 if (writeBom) {
1374 if (auto res = writer.write(bom); res.is_error()) {
1375 return res.error();
1376 }
1377 ++written;
1378 }
1379
1380 while (runeIter.next().copy_if_present(rune)) {
1381 if (auto writeRes = writer.write(rune); writeRes.is_error()) {
1382 return writeRes.error();
1383 }
1384 ++written;
1385 }
1386 return success(written);
1387 }
1388
1397 template<WriterImpl WI>
1399 auto runeIter = utf16_rune_iterator(str);
1400 char32_t rune;
1401 size_t written = 0;
1402 while (runeIter.next().copy_if_present(rune)) {
1403 auto seq = rune_to_utf8(rune);
1404 char8_t curCh;
1405 auto charIter = seq.iter();
1406 while (charIter.next().copy_if_present(curCh)) {
1407 if (auto writeRes = writer.write(curCh); writeRes.is_error()) {
1408 return writeRes.error();
1409 }
1410 ++written;
1411 }
1412 }
1413 return success(written);
1414 }
1415
1416
1426 template<WriterImpl WI>
1427 auto utf16_to_utf32(io::Writer<WI> &writer, const SliceConstU16Le &str, bool writeBom = false)
1429 auto runeIter = utf16_rune_iterator(str);
1430 char32_t rune;
1431 size_t written = 0;
1432
1433 if (writeBom) {
1434 if (auto res = writer.write(bom); res.is_error()) {
1435 return res.error();
1436 }
1437 ++written;
1438 }
1439
1440 while (runeIter.next().copy_if_present(rune)) {
1441 if (auto writeres = writer.write(rune); writeres.is_error()) {
1442 return writeres.error();
1443 }
1444 ++written;
1445 }
1446 return success(written);
1447 }
1448
1458 template<WriterImpl WI>
1459 auto utf16_to_utf32(io::Writer<WI> &writer, const SliceConstU16Be &str, bool writeBom = false)
1461 auto runeIter = utf16_rune_iterator(str);
1462 char32_t rune;
1463 size_t written = 0;
1464
1465 if (writeBom) {
1466 if (auto res = writer.write(bom); res.is_error()) {
1467 return res.error();
1468 }
1469 ++written;
1470 }
1471
1472 while (runeIter.next().copy_if_present(rune)) {
1473 if (auto writeres = writer.write(rune); writeres.is_error()) {
1474 return writeres.error();
1475 }
1476 ++written;
1477 }
1478 return success(written);
1479 }
1480
1490 template<WriterImpl WI>
1491 auto utf16_to_utf32(io::Writer<WI> &writer, const Slice<const char16_t> &str, bool writeBom = false)
1493 return utf16_to_utf32(writer, endian_from_native_slice(str), writeBom);
1494 }
1495
1504 template<WriterImpl WI>
1506 auto runeIter = utf16_rune_iterator(str);
1507 char32_t rune;
1508 size_t written = 0;
1509 while (runeIter.next().copy_if_present(rune)) {
1510 auto seq = rune_to_utf8(rune);
1511 char8_t curCh;
1512 auto charIter = seq.iter();
1513 while (charIter.next().copy_if_present(curCh)) {
1514 if (auto writeRes = writer.write(curCh); writeRes.is_error()) {
1515 return writeRes.error();
1516 }
1517 ++written;
1518 }
1519 }
1520 return success(written);
1521 }
1522
1531 template<WriterImpl WI>
1533 auto runeIter = utf16_rune_iterator(str);
1534 char32_t rune;
1535 size_t written = 0;
1536 while (runeIter.next().copy_if_present(rune)) {
1537 auto seq = rune_to_str(rune);
1538 char curCh;
1539 auto charIter = seq.iter();
1540 while (charIter.next().copy_if_present(curCh)) {
1541 if (auto writeRes = writer.write(curCh); writeRes.is_error()) {
1542 return writeRes.error();
1543 }
1544 ++written;
1545 }
1546 }
1547 return success(written);
1548 }
1549
1558 template<WriterImpl WI>
1560 auto runeIter = utf16_rune_iterator(str);
1561 char32_t rune;
1562 size_t written = 0;
1563 while (runeIter.next().copy_if_present(rune)) {
1564 auto seq = rune_to_str(rune);
1565 char curCh;
1566 auto charIter = seq.iter();
1567 while (charIter.next().copy_if_present(curCh)) {
1568 if (auto writeRes = writer.write(curCh); writeRes.is_error()) {
1569 return writeRes.error();
1570 }
1571 ++written;
1572 }
1573 }
1574 return success(written);
1575 }
1576
1585 template<WriterImpl WI>
1590
1599 template<WriterImpl WI>
1604
1614 template<WriterImpl WI>
1615 auto utf8_to_utf16(io::Writer<WI> &writer, const Slice<const char8_t> &str, bool writeBom = false)
1617 auto runeIter = utf8_rune_iterator(str);
1618 char32_t rune;
1619 size_t written = 0;
1620
1621 if (writeBom) {
1622 if (auto res = writer.write(bom); res.is_error()) {
1623 return res.error();
1624 }
1625 ++written;
1626 }
1627
1628 while (runeIter.next().copy_if_present(rune)) {
1629 auto seq = rune_to_utf16(rune);
1630 char16_t curCh;
1631 auto charIter = seq.iter();
1632 while (charIter.next().copy_if_present(curCh)) {
1633 if (auto writeRes = writer.write(curCh); writeRes.is_error()) {
1634 return writeRes.error();
1635 }
1636 ++written;
1637 }
1638 }
1639 return success(written);
1640 }
1641
1651 template<WriterImpl WI>
1652 auto str_to_utf16(io::Writer<WI> &writer, const Slice<const char> &str, bool writeBom = false)
1654 auto runeIter = str_rune_iterator(str);
1655 char32_t rune;
1656 size_t written = 0;
1657
1658 if (writeBom) {
1659 if (auto res = writer.write(bom); res.is_error()) {
1660 return res.error();
1661 }
1662 ++written;
1663 }
1664
1665 while (runeIter.next().copy_if_present(rune)) {
1666 auto seq = rune_to_utf16(rune);
1667 char16_t curCh;
1668 auto charIter = seq.iter();
1669 while (charIter.next().copy_if_present(curCh)) {
1670 if (auto writeRes = writer.write(curCh); writeRes.is_error()) {
1671 return writeRes.error();
1672 }
1673 ++written;
1674 }
1675 }
1676 return success(written);
1677 }
1678
1687 template<WriterImpl WI>
1690 auto runeIter = utf8_rune_iterator(str);
1691 char32_t rune;
1692 size_t written = 0;
1693 while (runeIter.next().copy_if_present(rune)) {
1694 auto seq = rune_to_str(rune);
1695 char curCh;
1696 auto charIter = seq.iter();
1697 while (charIter.next().copy_if_present(curCh)) {
1698 if (auto writeRes = writer.write(curCh); writeRes.is_error()) {
1699 return writeRes.error();
1700 }
1701 ++written;
1702 }
1703 }
1704 return success(written);
1705 }
1706
1715 template<WriterImpl WI>
1717 auto runeIter = str_rune_iterator(str);
1718 char32_t rune;
1719 size_t written = 0;
1720 while (runeIter.next().copy_if_present(rune)) {
1721 auto seq = rune_to_utf8(rune);
1722 char8_t curCh;
1723 auto charIter = seq.iter();
1724 while (charIter.next().copy_if_present(curCh)) {
1725 if (auto writeRes = writer.write(curCh); writeRes.is_error()) {
1726 return writeRes.error();
1727 }
1728 ++written;
1729 }
1730 }
1731 return success(written);
1732 }
1733
1743 template<WriterImpl WI>
1744 auto utf16_to_utf16le(io::Writer<WI> &writer, const SliceConstU16Native &str, bool writeBom = false)
1746 auto runeIter = utf16_rune_iterator(str);
1747 char32_t rune;
1748 size_t written = 0;
1749
1750 if (writeBom) {
1751 if (auto res = writer.write(endian_bom<std::endian::little, char16_t>); res.is_error()) {
1752 return res.error();
1753 }
1754 ++written;
1755 }
1756
1757 while (runeIter.next().copy_if_present(rune)) {
1758 auto seq = rune_to_utf16(rune);
1759 char16_t cur;
1760 auto seqIter = seq.iter();
1761 while (seqIter.next().copy_if_present(cur)) {
1762 if constexpr (std::endian::native != std::endian::little) {
1763 cur = flip_endian(cur);
1764 }
1765 if (auto writeRes = writer.write(cur); writeRes.is_error()) {
1766 return writeRes.error();
1767 }
1768 ++written;
1769 }
1770 }
1771 return success(written);
1772 }
1773
1783 template<WriterImpl WI>
1784 auto utf16_to_utf16le(io::Writer<WI> &writer, const Slice<const char16_t> &str, bool writeBom = false)
1786 return utf16_to_utf16le(writer, endian_from_native_slice(str), writeBom);
1787 }
1788
1798 template<WriterImpl WI>
1799 auto utf16_to_utf16be(io::Writer<WI> &writer, const SliceConstU16Native &str, bool writeBom = false)
1801 auto runeIter = utf16_rune_iterator(str);
1802 char32_t rune;
1803 size_t written = 0;
1804
1805 if (writeBom) {
1806 if (auto res = writer.write(endian_bom<std::endian::big, char16_t>); res.is_error()) {
1807 return res.error();
1808 }
1809 ++written;
1810 }
1811
1812 while (runeIter.next().copy_if_present(rune)) {
1813 auto seq = rune_to_utf16(rune);
1814 char16_t cur;
1815 auto seqIter = seq.iter();
1816 while (seqIter.next().copy_if_present(cur)) {
1817 if constexpr (std::endian::native != std::endian::big) {
1818 cur = flip_endian(cur);
1819 }
1820 if (auto writeRes = writer.write(cur); writeRes.is_error()) {
1821 return writeRes.error();
1822 }
1823 ++written;
1824 }
1825 }
1826 return success(written);
1827 }
1828
1838 template<WriterImpl WI>
1839 auto utf16_to_utf16be(io::Writer<WI> &writer, const Slice<const char16_t> &str, bool writeBom = false)
1841 return utf16_to_utf16be(writer, endian_from_native_slice(str), writeBom);
1842 }
1843
1853 template<WriterImpl WI>
1854 auto utf16le_to_utf16be(io::Writer<WI> &writer, const SliceConstU16Le &str, bool writeBom = false)
1856 auto runeIter = utf16_rune_iterator(str);
1857 char32_t rune;
1858 size_t written = 0;
1859
1860 if (writeBom) {
1861 if (auto res = writer.write(endian_bom<std::endian::big, char16_t>); res.is_error()) {
1862 return res.error();
1863 }
1864 ++written;
1865 }
1866
1867 while (runeIter.next().copy_if_present(rune)) {
1868 auto seq = rune_to_utf16(rune);
1869 char16_t cur;
1870 auto seqIter = seq.iter();
1871 while (seqIter.next().copy_if_present(cur)) {
1872 if constexpr (std::endian::native != std::endian::big) {
1873 cur = flip_endian(cur);
1874 }
1875 if (auto writeRes = writer.write(cur); writeRes.is_error()) {
1876 return writeRes.error();
1877 }
1878 ++written;
1879 }
1880 }
1881 return success(written);
1882 }
1883
1893 template<WriterImpl WI>
1894 auto utf16be_to_utf16le(io::Writer<WI> &writer, const SliceConstU16Be &str, bool writeBom = false)
1896 auto runeIter = utf16_rune_iterator(str);
1897 char32_t rune;
1898 size_t written = 0;
1899
1900 if (writeBom) {
1901 if (auto res = writer.write(endian_bom<std::endian::little, char16_t>); res.is_error()) {
1902 return res.error();
1903 }
1904 ++written;
1905 }
1906
1907 while (runeIter.next().copy_if_present(rune)) {
1908 auto seq = rune_to_utf16(rune);
1909 char16_t cur;
1910 auto seqIter = seq.iter();
1911 while (seqIter.next().copy_if_present(cur)) {
1912 if constexpr (std::endian::native != std::endian::little) {
1913 cur = flip_endian(cur);
1914 }
1915 if (auto writeRes = writer.write(cur); writeRes.is_error()) {
1916 return writeRes.error();
1917 }
1918 ++written;
1919 }
1920 }
1921 return success(written);
1922 }
1923
1933 template<WriterImpl WI>
1934 auto utf16le_to_utf16(io::Writer<WI> &writer, const SliceConstU16Le &str, bool writeBom = false)
1936 auto runeIter = utf16_rune_iterator(str);
1937 char32_t rune;
1938 size_t written = 0;
1939
1940 if (writeBom) {
1941 if (auto res = writer.write(endian_bom<std::endian::native, char16_t>); res.is_error()) {
1942 return res.error();
1943 }
1944 ++written;
1945 }
1946
1947 while (runeIter.next().copy_if_present(rune)) {
1948 auto seq = rune_to_utf16(rune);
1949 char16_t cur;
1950 auto seqIter = seq.iter();
1951 while (seqIter.next().copy_if_present(cur)) {
1952 if (auto writeRes = writer.write(cur); writeRes.is_error()) {
1953 return writeRes.error();
1954 }
1955 ++written;
1956 }
1957 }
1958 return success(written);
1959 }
1960
1970 template<WriterImpl WI>
1971 auto utf16be_to_utf16(io::Writer<WI> &writer, const SliceConstU16Be &str, bool writeBom = false)
1973 auto runeIter = utf16_rune_iterator(str);
1974 char32_t rune;
1975 size_t written = 0;
1976
1977 if (writeBom) {
1978 if (auto res = writer.write(endian_bom<std::endian::native, char16_t>); res.is_error()) {
1979 return res.error();
1980 }
1981 ++written;
1982 }
1983
1984 while (runeIter.next().copy_if_present(rune)) {
1985 auto seq = rune_to_utf16(rune);
1986 char16_t cur;
1987 auto seqIter = seq.iter();
1988 while (seqIter.next().copy_if_present(cur)) {
1989 if (auto writeRes = writer.write(cur); writeRes.is_error()) {
1990 return writeRes.error();
1991 }
1992 ++written;
1993 }
1994 }
1995 return success(written);
1996 }
1997
2007 template<WriterImpl WI>
2008 auto utf32_to_utf32le(io::Writer<WI> &writer, const SliceConstU32Native &str, bool writeBom = false)
2010 auto runeIter = utf32_rune_iterator(str);
2011 char32_t rune;
2012 size_t written = 0;
2013
2014 if (writeBom) {
2015 if (auto res = writer.write(endian_bom<std::endian::little, char32_t>); res.is_error()) {
2016 return res.error();
2017 }
2018 ++written;
2019 }
2020
2021 while (runeIter.next().copy_if_present(rune)) {
2022 if constexpr (std::endian::native != std::endian::little) {
2023 rune = flip_endian(rune);
2024 }
2025 if (auto writeRes = writer.write(rune); writeRes.is_error()) {
2026 return writeRes.error();
2027 }
2028 ++written;
2029 }
2030 return success(written);
2031 }
2032
2042 template<WriterImpl WI>
2043 auto utf32_to_utf32le(io::Writer<WI> &writer, const Slice<const char32_t> &str, bool writeBom = false)
2045 return utf32_to_utf32le(writer, endian_from_native_slice(str), writeBom);
2046 }
2047
2057 template<WriterImpl WI>
2058 auto utf32_to_utf32be(io::Writer<WI> &writer, const SliceConstU32Native &str, bool writeBom = false)
2060 auto runeIter = utf32_rune_iterator(str);
2061 char32_t rune;
2062 size_t written = 0;
2063
2064 if (writeBom) {
2065 if (auto res = writer.write(endian_bom<std::endian::big, char32_t>); res.is_error()) {
2066 return res.error();
2067 }
2068 ++written;
2069 }
2070
2071 while (runeIter.next().copy_if_present(rune)) {
2072 if constexpr (std::endian::native != std::endian::big) {
2073 rune = flip_endian(rune);
2074 }
2075 if (auto writeRes = writer.write(rune); writeRes.is_error()) {
2076 return writeRes.error();
2077 }
2078 ++written;
2079 }
2080 return success(written);
2081 }
2082
2092 template<WriterImpl WI>
2093 auto utf32_to_utf32be(io::Writer<WI> &writer, const Slice<const char32_t> &str, bool writeBom = false)
2095 return utf32_to_utf32be(writer, endian_from_native_slice(str), writeBom);
2096 }
2097
2107 template<WriterImpl WI>
2108 auto utf32le_to_utf32be(io::Writer<WI> &writer, const SliceConstU32Le &str, bool writeBom = false)
2110 auto runeIter = utf32_rune_iterator(str);
2111 char32_t rune;
2112 size_t written = 0;
2113
2114 if (writeBom) {
2115 if (auto res = writer.write(endian_bom<std::endian::big, char32_t>); res.is_error()) {
2116 return res.error();
2117 }
2118 ++written;
2119 }
2120
2121 while (runeIter.next().copy_if_present(rune)) {
2122 if constexpr (std::endian::native != std::endian::big) {
2123 rune = flip_endian(rune);
2124 }
2125 if (auto writeRes = writer.write(rune); writeRes.is_error()) {
2126 return writeRes.error();
2127 }
2128 ++written;
2129 }
2130 return success(written);
2131 }
2132
2142 template<WriterImpl WI>
2143 auto utf32be_to_utf32le(io::Writer<WI> &writer, const SliceConstU32Be &str, bool writeBom = false)
2145 auto runeIter = utf32_rune_iterator(str);
2146 char32_t rune;
2147 size_t written = 0;
2148
2149 if (writeBom) {
2150 if (auto res = writer.write(endian_bom<std::endian::little, char32_t>); res.is_error()) {
2151 return res.error();
2152 }
2153 ++written;
2154 }
2155
2156 while (runeIter.next().copy_if_present(rune)) {
2157 if constexpr (std::endian::native != std::endian::little) {
2158 rune = flip_endian(rune);
2159 }
2160 if (auto writeRes = writer.write(rune); writeRes.is_error()) {
2161 return writeRes.error();
2162 }
2163 ++written;
2164 }
2165 return success(written);
2166 }
2167
2177 template<WriterImpl WI>
2178 auto utf32le_to_utf32(io::Writer<WI> &writer, const SliceConstU32Le &str, bool writeBom = false)
2180 auto runeIter = utf32_rune_iterator(str);
2181 char32_t rune;
2182 size_t written = 0;
2183
2184 if (writeBom) {
2185 if (auto res = writer.write(endian_bom<std::endian::native, char32_t>); res.is_error()) {
2186 return res.error();
2187 }
2188 ++written;
2189 }
2190
2191 while (runeIter.next().copy_if_present(rune)) {
2192 if (auto writeRes = writer.write(rune); writeRes.is_error()) {
2193 return writeRes.error();
2194 }
2195 ++written;
2196 }
2197 return success(written);
2198 }
2199
2209 template<WriterImpl WI>
2210 auto utf32be_to_utf32(io::Writer<WI> &writer, const SliceConstU32Be &str, bool writeBom = false)
2212 auto runeIter = utf32_rune_iterator(str);
2213 char32_t rune;
2214 size_t written = 0;
2215
2216 if (writeBom) {
2217 if (auto res = writer.write(endian_bom<std::endian::native, char32_t>); res.is_error()) {
2218 return res.error();
2219 }
2220 ++written;
2221 }
2222
2223 while (runeIter.next().copy_if_present(rune)) {
2224 if (auto writeRes = writer.write(rune); writeRes.is_error()) {
2225 return writeRes.error();
2226 }
2227 ++written;
2228 }
2229 return success(written);
2230 }
2231
2232} // namespace mtcore
2233
2234#endif // MTSTD_ENCODINGS_HPP
ValIter< T > val(const T &r)
Generic value iterator that uses the operator[] and incrementing indexes to iterate over a collection...
Definition iter.hpp:114
constexpr auto nullopt
Placeholder value for an empty Optional.
Definition optional.hpp:409
ConstPtrIter< T > const_ptr(const T &r)
Generic constant pointer iterator that uses the operator[] and incrementing indexes to iterate over a...
Definition iter.hpp:128
PtrIter< T > ptr(T &r)
Generic pointer iterator that uses the operator[] and incrementing indexes to iterate over a collecti...
Definition iter.hpp:101
constexpr char32_t flip_endian_ch32(const char32_t ch)
Flips the endianness of a char32.
constexpr char16_t flip_endian_ch16(const char16_t ch)
Flips the endianness of a char16.
constexpr auto opposite
endian that is opposite of machine (opposite of std::endian::native)
constexpr auto flip_endian(const T bytes)
Flips the endian of a u16, char16, u32, or char32.
constexpr auto machine
endian of machine (alias for std::endian::native)
RuneConversionError
Errors that can occur when doing conversions to a rune.
SliceWriteError
Errors when writing to a slice.
#define ensure(check,...)
Ensures that a check holds true, aborts the program if not true Will print error if the condition is ...
Success< void > success()
Creates a successful void Result object.
Definition result.hpp:398
uint32_t u32
Alias for 32-bit unsigned ints.
constexpr auto is_bom(const char32_t ch)
Checks if a character is a BOM in native ordering.
EndianSlice< const char16_t, std::endian::big > SliceConstU16Be
Slice type representing const big endian UTF-16 string *.
auto utf32_to_utf32le(io::Writer< WI > &writer, const SliceConstU32Native &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Converts UTF-32 Native Endian to UTF-32 Little Endian.
auto utf32le_to_utf32(io::Writer< WI > &writer, const SliceConstU32Le &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Converts UTF-32 Little Endian to UTF-32 Native Endian.
EndianSlice< const char32_t, std::endian::little > SliceConstU32Le
Slice type representing const little endian UTF-32 string.
Slice< char16_t > slice_from_native_endian(SliceU16Native s)
Gets a normal slice from a native endian slice.
RuneIterator< char8_t > utf8_rune_iterator(const Slice< const char8_t > &s)
Rune iterator for a UTF8 sequence.
auto utf16_to_utf16le(io::Writer< WI > &writer, const SliceConstU16Native &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Converts native UTF-16 to UTF-16 Little Endian.
Optional< std::endian > u16_detect_endian(const char16_t ch)
Tries to detect the endian encoding based on a possible BOM mark.
EndianSlice< const char32_t, std::endian::big > SliceConstU32Be
Slice type representing const big endian UTF-32 string.
auto utf32le_to_utf32be(io::Writer< WI > &writer, const SliceConstU32Le &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Converts UTF-32 Little Endian to UTF-32 Big Endian.
auto utf32_to_utf32be(io::Writer< WI > &writer, const SliceConstU32Native &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Converts UTF-32 Native Endian to UTF-32 Big Endian.
SliceU16Le endian_from_le_slice(Slice< char16_t > s)
Gets a UTF-16 endian slice from a slice which is assumed ot be little endian.
Slice< char16_t > slice_from_le_endian(SliceU16Native s)
Gets a normal slice from a little endian slice.
auto utf32be_to_utf32(io::Writer< WI > &writer, const SliceConstU32Be &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Converts UTF-32 Big Endian to UTF-32 Native Endian.
EndianSlice< char16_t, std::endian::native > SliceU16Native
Slice type representing native UTF-16 string *.
auto utf16_to_utf32(io::Writer< WI > &writer, const SliceConstU16Le &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Will convert a UTF-16 string to UTF-32.
constexpr bool is_low_surrogate(char16_t ch)
Checks if UTF-16 character is a low surrogate *.
auto utf8_to_utf32(io::Writer< WI > &writer, const Slice< const char8_t > &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Will convert a UTF-8 string to UTF-32.
EndianSlice< const char16_t, std::endian::native > SliceConstU16Native
Slice type representing const native UTF-16 string *.
Optional< std::endian > u32_detect_endian(const char32_t ch)
Tries to detect the endian encoding based on a possible BOM mark.
auto str_to_utf32(io::Writer< WI > &writer, const Slice< const char > &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Will convert a UTF-8 string to UTF-32.
auto utf32_to_str(io::Writer< WI > &writer, const SliceConstU32Be &str) -> Result< size_t, typename io::Writer< WI >::ErrType >
Will convert a UTF-32 string to character sequence (UTF-8 encoded chars)
RuneIterator< char32_t > utf32_rune_iterator(const Slice< const char32_t > &s)
Rune iterator for a UTF32 sequence.
auto utf16be_to_utf16le(io::Writer< WI > &writer, const SliceConstU16Be &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Converts UTF-16 Big Endian to UTF-16 Little Endian.
Slice< char16_t > slice_from_be_endian(SliceU16Native s)
Gets a normal slice from a big endian slice.
auto utf16be_to_utf16(io::Writer< WI > &writer, const SliceConstU16Be &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Converts UTF-16 Big Endian to Native UTF-16.
constexpr char32_t bom
Native representation of Byte Order Mark (BOM)
auto utf16le_to_utf16be(io::Writer< WI > &writer, const SliceConstU16Le &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Converts UTF-16 Little Endian to UTF-16 Big Endian.
auto utf32_to_utf8(io::Writer< WI > &writer, const SliceConstU32Be &str) -> Result< size_t, typename io::Writer< WI >::ErrType >
Will convert a UTF-32 string to UTF-8 string.
SliceU16Be endian_from_be_slice(Slice< char16_t > s)
Gets a UTF-16 endian slice from a slice which is assumed ot be big endian.
EndianSlice< char16_t, std::endian::little > SliceU16Le
Slice type representing little endian UTF-16 string *.
auto utf16_to_str(io::Writer< WI > &writer, const SliceConstU16Be &str) -> Result< size_t, typename io::Writer< WI >::ErrType >
Will convert a UTF-16 string to UTF-8.
auto utf32be_to_utf32le(io::Writer< WI > &writer, const SliceConstU32Be &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Converts UTF-32 Big Endian to UTF-32 Little Endian.
constexpr bool is_high_surrogate(char16_t ch)
Checks if UTF-16 character is a high surrogate *.
EndianSlice< char16_t, std::endian::big > SliceU16Be
Slice type representing big endian UTF-16 string *.
auto utf32_to_utf16(io::Writer< WI > &writer, const SliceConstU32Be &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Will convert a UTF-32 string to UTF-16 string Can optionally write a BOM to the prefix.
constexpr auto is_bom_rev_16_bit(const char16_t ch)
Checks if a 16-bit character is a BOM in reverse ordering.
auto utf16_to_utf16be(io::Writer< WI > &writer, const SliceConstU16Native &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Converts native UTF-16 to UTF-16 Big Endian.
auto utf8_to_str(io::Writer< WI > &writer, const Slice< const char8_t > &str) -> Result< size_t, typename io::Writer< WI >::ErrType >
Will convert a UTF-8 string to character sequence.
RuneIterator< char16_t > utf16_rune_iterator(const Slice< const char16_t > &s)
Rune iterator for a UTF16 sequence.
RuneIterator< char > str_rune_iterator(const Slice< const char > &s)
Rune iterator for a slice of characters.
U8Seq rune_to_utf8(Rune rune)
Converts a UTF-32 Rune to a utf-8 sequence.
EndianSlice< char32_t, std::endian::little > SliceU32Le
Slice type representing little endian UTF-32 string *.
constexpr auto is_bom_rev_32_bit(const char32_t ch)
Checks if a 32-bit character is a BOM in reverse ordering.
constexpr bool is_surrogate(char16_t ch)
Checks if UTF-16 character is a surrogate.
auto utf16le_to_utf16(io::Writer< WI > &writer, const SliceConstU16Le &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Converts UTF-16 Little Endian to Native UTF-16.
EndianSlice< const char32_t, std::endian::native > SliceConstU32Native
Slice type representing const native UTF-32 string.
auto str_to_utf16(io::Writer< WI > &writer, const Slice< const char > &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Will convert a UTF-8 string to UTF-16.
U16Seq rune_to_utf16(Rune rune)
Converts a UTF-32 Rune to a utf-16 sequence.
constexpr auto oppositeEndian
Returns the opposite endianness of a given endian (default is opposite of the native endian)
CharSeq rune_to_str(Rune rune)
Converts a UTF-32 Rune to a sequence of char.
EndianSlice< char32_t, std::endian::native > SliceU32Native
Slice type representing native UTF-32 string *.
EndianSlice< char32_t, std::endian::big > SliceU32Be
Slice type representing big endian UTF-32 string.
std::variant< SliceU16Le, SliceU16Be > endian_from_slice(Slice< char16_t > s)
Gets a UTF-16 endian slice from an input slice Will try to use the first character of the slice as a ...
auto utf16_to_utf8(io::Writer< WI > &writer, const SliceConstU16Be &str) -> Result< size_t, typename io::Writer< WI >::ErrType >
Will convert a UTF-16 string to UTF-8.
SliceU16Native endian_from_native_slice(Slice< char16_t > s)
Gets a UTF-16 endian slice from a slice which is assumed ot be native endian.
auto utf8_to_utf16(io::Writer< WI > &writer, const Slice< const char8_t > &str, bool writeBom=false) -> Result< size_t, typename io::Writer< WI >::ErrType >
Will convert a UTF-8 string to UTF-16.
auto str_to_utf8(io::Writer< WI > &writer, const Slice< const char > &str) -> Result< size_t, typename io::Writer< WI >::ErrType >
Will convert a character sequence to UTF-8 string.
EndianSlice< const char16_t, std::endian::little > SliceConstU16Le
Slice type representing const little endian UTF-16 string *.
Generic iterator defaults built on common contracts Does not guarantee performance of iterators Actua...
Definition iter.hpp:91
Core library for C++ with Zig-related functionality.
constexpr T endian_bom
char32_t Rune
Result< Rune, RuneConversionError > utf8_to_rune(const Slice< const char8_t > &rune)
UTF-8 to a Rune.
impl::ShortSeq< char, 4 > CharSeq
Result< Rune, RuneConversionError > utf16_to_rune(const Slice< const char16_t > &rune)
UTF-16 to a Rune.
Result< Rune, RuneConversionError > str_to_rune(const Slice< const char > &rune)
Characters to a Rune.
impl::ShortSeq< char16_t, 2 > U16Seq
impl::ShortSeq< char8_t, 4 > U8Seq
Endian Wrapper for grabbing the endian value By default, will handle converting to and from the nativ...
constexpr T as_endian(std::endian viewAs) const
Gets the value with the specific endian.
Endian Wrapper for grabbing the endian value By default, will handle converting to and from the nativ...
constexpr EndianWrapper & operator=(T val)
Sets the pointed at value using the native endian.
constexpr EndianWrapper & with_endian(T newVal, std::endian setAs)
Sets the target value with the provided endian interpretation.
constexpr T as_endian(std::endian viewAs) const
Gets the value with the specific endian.
A Slice with Endian-aware data which is just a pointer + length + endian direction Accessing elements...
bool operator>=(const EndianSlice< std::add_const_t< T >, E > &o) const noexcept
bool operator>(const EndianSlice< std::remove_const_t< T >, E > &o) const noexcept
constexpr ConstEndianWrapper at(size_t i) const noexcept
Access element at a specific index Const operator, returns a const reference.
EndianWrapper operator[](size_t i) noexcept
Access element at a specific index Non-const operator, returns a mutable reference.
constexpr void init(T *head, size_t len)
Initializes a Slice Using init instead of a constructor so that slices allocated with malloc or an ar...
bool operator<=(const EndianSlice< std::remove_const_t< T >, E > &o) const noexcept
constexpr EndianSlice sub(size_t start) const noexcept
Gets a sub Slice from start to end.
bool operator<(const EndianSlice< std::remove_const_t< T >, E > &o) const noexcept
bool operator!=(const EndianSlice< std::add_const_t< T >, E > &o) const noexcept
bool operator>=(const EndianSlice< std::remove_const_t< T >, E > &o) const noexcept
EndianWrapper at(size_t i) noexcept
Access element at a specific index Non-const operator, returns a mutable reference.
constexpr Slice< std::add_const_t< T > > to_const() const noexcept
Converts to a const Slice.
constexpr bool empty() const noexcept
Checks if a Slice is empty.
decltype(auto) ptr_iter() const noexcept
decltype(auto) ptr_iter() noexcept
bool operator!=(const EndianSlice< std::remove_const_t< T >, E > &o) const noexcept
bool operator<=(const EndianSlice< std::add_const_t< T >, E > &o) const noexcept
bool operator==(const EndianSlice< std::remove_const_t< T >, E > &o) const noexcept
std::strong_ordering operator<=>(const EndianSlice< std::remove_const_t< T >, E > &other) const noexcept
Compares against another Slice.
decltype(auto) iter() const noexcept
constexpr ConstEndianWrapper operator[](size_t i) const noexcept
Access element at a specific index Const operator, returns a const reference.
bool operator>(const EndianSlice< std::add_const_t< T >, E > &o) const noexcept
constexpr EndianSlice sub(size_t start, size_t len) const noexcept
Gets a sub Slice from start up to a length.
constexpr size_t size() const noexcept
Gets the size of a Slice.
std::strong_ordering operator<=>(const EndianSlice< std::add_const_t< T >, E > &other) const noexcept
Compares against another Slice.
bool operator==(const EndianSlice< std::add_const_t< T >, E > &o) const noexcept
bool operator<(const EndianSlice< std::add_const_t< T >, E > &o) const noexcept
Represents a value that may or may not exist (an "Optional" value) Similar concept to std::optional,...
Definition optional.hpp:235
Represents a Result that may have an error (error code) or a success value A type of "void" means the...
Definition result.hpp:170
Optional< char32_t > next()
Slice< const char16_t > data
Slice< const char32_t > data
Optional< char32_t > next()
Iterates over runes of utf8 sequences (char, char_t)
Slice< const T > data
Optional< Rune > next()
Gets the next Rune, if present.
A Slice which is just a pointer + length Accessing elements through the array operator will do bounds...
constexpr Slice sub(size_t start) const noexcept
Gets a sub Slice from start.
constexpr size_t size() const noexcept
Gets the size of a Slice.
A writer that writes data to some sort of stream or buffer Note: the data elements written should be ...
Definition io/writer.hpp:51