MT Core (C++)
Core library for replacing C++ standard in project usage
Loading...
Searching...
No Matches
ascii.hpp
Go to the documentation of this file.
1/*
2
3Copyright 2025 Matthew Tolman
4
5Licensed under the Apache License, Version 2.0 (the "License");
6you may not use this file except in compliance with the License.
7You may obtain a copy of the License at
8
9 http://www.apache.org/licenses/LICENSE-2.0
10
11Unless required by applicable law or agreed to in writing, software
12distributed under the License is distributed on an "AS IS" BASIS,
13WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14See the License for the specific language governing permissions and
15limitations under the License.
16
17*/
18
23
24#ifndef MTCORE_ASCII_HPP
25#define MTCORE_ASCII_HPP
26
29#include "mtcore/core.hpp"
30#include <array>
31#include <limits>
32
33namespace mtcore {
39 namespace ascii {
45
51 inline bool is_digit(const char c) { return c >= '0' && c <= '9'; }
52
58 inline bool is_hex(const char c) {
59 if (is_digit(c)) {
60 return true;
61 }
62 const auto lower = (c | 0b0100000);
63 return lower >= 'a' && lower <= 'f';
64 }
65
72 inline bool is_hex_str(Slice<const char> str) {
73 if (str.empty()) {
74 return false;
75 }
76
77 auto iter = str.iter();
78 char cur;
79 while (iter.next().copy_if_present(cur)) {
80 if (!is_hex(cur)) {
81 return false;
82 }
83 }
84 return true;
85 }
86
93 inline bool is_int_str(Slice<const char> str) {
94 if (str.empty()) {
95 return false;
96 }
97
98 auto iter = str.iter();
99 char cur;
100 bool first = true;
101 bool digit = false;
102 while (iter.next().copy_if_present(cur)) {
103 if (first) {
104 first = false;
105 if (cur == '+' || cur == '-') {
106 continue;
107 }
108 }
109 if (!is_digit(cur)) {
110 return false;
111 }
112 digit = true;
113 }
114 return digit;
115 }
116
124 if (str.empty()) {
125 return false;
126 }
127
128 auto iter = str.iter();
129 char cur;
130 bool first = true;
131 bool digit = false;
132 while (iter.next().copy_if_present(cur)) {
133 if (first) {
134 first = false;
135 if (cur == '+') {
136 continue;
137 }
138 }
139 if (!is_digit(cur)) {
140 return false;
141 }
142 digit = true;
143 }
144 return digit;
145 }
146
155 template<typename Out = i32>
157 Out res = 0;
158 if (!parse.head || !parse.len) {
160 }
161
162 bool negate = false;
163 auto s = parse;
164 if (s[0] == '-') {
165 negate = true;
166 s = s.sub(1);
167 }
168 else if (s[0] == '+') {
169 s = s.sub(1);
170 }
171
172 if (negate && !std::numeric_limits<Out>::is_signed) {
174 }
175
176 if (s.size() == 0) {
178 }
179
180 if (s.size() > 2 && s[0] == '0' && s[1] == 'x') {
181 s = s.sub(2);
182 }
183 else if (s.size() > 1 && s[0] == 'x') {
184 s = s.sub(1);
185 }
186
187 char ch;
188 auto it = s.iter();
189 while (it.next().copy_if_present(ch)) {
190 if (ch == ',') {
191 continue;
192 }
193 if (!is_hex(ch)) {
195 }
196
197 auto old = res;
198 // Try to detect overflows before they happen (it's undefined behavior if they happen)
199 if (negate) {
200 if (res < std::numeric_limits<Out>::min() / 16) {
202 }
203 }
204 else {
205 if (res > std::numeric_limits<Out>::max() / 16) {
207 }
208 }
209 res *= 16;
210
211 auto addAmt = (is_digit(ch)) ? ch - '0' : (ch | 0b0100000) - 'a' + 10;
212 // Try to detect overflows before they happen (it's undefined behavior if they happen)
213 if (negate) {
214 if (res != 0 && std::abs(std::numeric_limits<Out>::min() - res) < addAmt) {
216 }
217 addAmt = -addAmt;
218 }
219 // Try to detect overflows before they happen (it's undefined behavior if they happen)
220 else if (std::numeric_limits<Out>::max() - res < addAmt) {
222 }
223 res += addAmt;
224
225 if (!negate && old > res) {
227 }
228 else if (negate && old < res) {
230 }
231 }
232 return res;
233 }
234
243 template<typename Out = i32>
245 Out res = 0;
246 if (!parse.head || !parse.len) {
248 }
249
250 bool negate = false;
251 auto s = parse;
252 if (s[0] == '-') {
253 negate = true;
254 s = s.sub(1);
255 }
256 else if (s[0] == '+') {
257 s = s.sub(1);
258 }
259
260 if (negate && !std::numeric_limits<Out>::is_signed) {
262 }
263
264 if (s.size() == 0) {
266 }
267
268 char ch;
269 auto it = s.iter();
270 while (it.next().copy_if_present(ch)) {
271 if (ch == ',') {
272 continue;
273 }
274 if (!is_digit(ch)) {
276 }
277
278 auto old = res;
279 // Try to detect overflows before they happen (it's undefined behavior if they happen)
280 if (negate) {
281 if (res < std::numeric_limits<Out>::min() / 10) {
283 }
284 }
285 else {
286 if (res > std::numeric_limits<Out>::max() / 10) {
288 }
289 }
290 res *= 10;
291
292 auto addAmt = static_cast<Out>(ch - '0');
293 if (negate) {
294 if constexpr (std::is_signed_v<Out>) {
295 // Try to detect overflows before they happen (it's undefined behavior if they happen)
296 if (res != 0 && std::abs(std::numeric_limits<Out>::min() - res) < addAmt) {
298 }
299 addAmt = -addAmt;
300 }
301 else {
302 unreachable();
303 }
304 }
305 // Try to detect overflows before they happen (it's undefined behavior if they happen)
306 else if (std::numeric_limits<Out>::max() - res < addAmt) {
308 }
309 res += addAmt;
310
311 if (!negate && old > res) {
313 }
314 else if (negate && old < res) {
316 }
317 }
318 return res;
319 }
320
326 int code;
327 bool operator==(const UnescapeError &o) const { return code == o.code; }
328
329 bool operator!=(const UnescapeError &o) const { return code != o.code; }
330
333 };
334
343 inline Result<char, UnescapeError> unescape_char(Slice<const char> str, char escapeChar = '\\') {
344 if (str.empty()) {
346 }
347
348 if (str.size() == 1) {
349 if (str[0] == escapeChar) {
351 }
352 return str[0];
353 }
354
355 if (str[0] != escapeChar) {
356 return str[0];
357 }
358
359 char escaped = str[1];
360 switch (str[1]) {
361 case '0':
362 escaped = '\0';
363 break;
364 case '?':
365 escaped = '\?';
366 break;
367 case 'a':
368 escaped = '\a';
369 break;
370 case 'b':
371 escaped = '\b';
372 break;
373 case 'e':
374 escaped = '\x1B';
375 break;
376 case 'f':
377 escaped = '\f';
378 break;
379 case 'n':
380 escaped = '\n';
381 break;
382 case 'r':
383 escaped = '\r';
384 break;
385 case 't':
386 escaped = '\t';
387 break;
388 case 'v':
389 escaped = '\v';
390 break;
391 case 'x': {
392 auto hex = std::array<char, 2>{'0', '0'};
393 if (2 < str.size() && is_hex(str[2])) {
394 hex[0] = str[2];
395 if (3 < str.size() && is_hex(str[3])) {
396 hex[1] = str[3];
397 auto parseRes = hex_to_int<char>(slice_from(hex));
398 if (parseRes.is_success()) {
399 escaped = parseRes.value();
400 }
401 else {
403 }
404 }
405 else {
407 }
408 }
409 else {
411 }
412 break;
413 }
414 default:
415 escaped = str[1];
416 }
417 return escaped;
418 }
419
445 template<typename Writer>
447 write_unescaped(Writer &writer, Slice<const char> str, char escapeChar = '\\') {
448 using ErrType = std::variant<typename Writer::ErrType, UnescapeError>;
449 size_t written = 0;
450 for (size_t i = 0; i < str.size(); ++i) {
451 auto cur = str[i];
452 if (cur == escapeChar) {
453 if (i + 1 < str.size()) {
454 // we're going to consume at least one additional character
455 mtdefer { i += 1; };
456 char escaped = str[i + 1];
457 switch (str[i + 1]) {
458 case '0':
459 escaped = '\0';
460 break;
461 case '?':
462 escaped = '\?';
463 break;
464 case 'a':
465 escaped = '\a';
466 break;
467 case 'b':
468 escaped = '\b';
469 break;
470 case 'e':
471 escaped = '\x1B';
472 break;
473 case 'f':
474 escaped = '\f';
475 break;
476 case 'n':
477 escaped = '\n';
478 break;
479 case 'r':
480 escaped = '\r';
481 break;
482 case 't':
483 escaped = '\t';
484 break;
485 case 'v':
486 escaped = '\v';
487 break;
488 case 'x': {
489 auto hex = std::array{'0', '0'};
490 if (i + 2 < str.size() && is_hex(str[i + 2])) {
491 hex[0] = str[i + 2];
492 if (i + 3 < str.size() && is_hex(str[i + 3])) {
493 hex[1] = str[i + 3];
494 auto parseRes = hex_to_int<char>(slice_from(hex));
495 if (parseRes.is_success()) {
496 mtdefer { i += 2; };
497 escaped = parseRes.value();
498 }
499 else {
501 }
502 }
503 else {
505 }
506 }
507 else {
509 }
510 break;
511 }
512 default:
513 escaped = str[i + 1];
514 }
515
516 auto chRes = writer.write(escaped);
517 if (chRes.is_error()) {
518 return error(ErrType{chRes.error().code});
519 }
520 ++written;
521 }
522 else {
524 }
525 }
526 else {
527 auto chRes = writer.write(cur);
528 if (chRes.is_error()) {
529 return error(ErrType{chRes.error().code});
530 }
531 ++written;
532 }
533 }
534 return written;
535 }
536 } // namespace ascii
537} // namespace mtcore
538#endif // MTCORE_ASCII_HPP
bool is_hex_str(Slice< const char > str)
Checks if a string is composed of only hex characters Will return false if string is empty.
Definition ascii.hpp:72
bool is_digit(const char c)
Checks if a character is an ASCII digit (0-9)
Definition ascii.hpp:51
bool is_int_str(Slice< const char > str)
Checks if a string is composed of only integer characters Will return false if string is empty.
Definition ascii.hpp:93
bool is_hex(const char c)
Checks if a character is an ASCII hex digit (0-9, a-f, A-F)
Definition ascii.hpp:58
Result< Out, AsciiNumericParseError > base10_to_int(const Slice< const char > &parse)
Tries to parse an ASCII numeric string into an integer Will return an error if the parsed string does...
Definition ascii.hpp:244
bool is_pos_int_str(Slice< const char > str)
Checks if a string is composed of only positive integer characters Will return false if string is emp...
Definition ascii.hpp:123
Result< Out, AsciiNumericParseError > hex_to_int(const Slice< const char > &parse)
Tries to parse an ASCII hex string into an integer Will return an error if the parsed string does not...
Definition ascii.hpp:156
Result< size_t, std::variant< typename Writer::ErrType, UnescapeError > > write_unescaped(Writer &writer, Slice< const char > str, char escapeChar='\\')
Writes unescaped ASCII characters to a stream Note: This does NOT support writing Unicode escape code...
Definition ascii.hpp:447
Result< char, UnescapeError > unescape_char(Slice< const char > str, char escapeChar='\\')
Unescapes the first (potentially) escaped character in a character sequence If the sequence is empty,...
Definition ascii.hpp:343
constexpr Slice< const char32_t > slice_from(char32_t *cstr)
Creates a slice from a utf32 string in the form of a c string.
AsciiNumericParseError
Errors when parsing ASCII strings as numbers.
Definition ascii.hpp:44
#define unreachable(...)
Marks code as unreachable.
#define mtdefer
Defer statement that will mtdefer execution until the scope is left, at which point the code will run...
Error< Underlying > error(Underlying err)
Creates an error.
Definition result.hpp:425
ASCII-related methods for parsing and character classification Split into its own namespace to allow ...
Definition ascii.hpp:39
Generic iterator defaults built on common contracts Does not guarantee performance of iterators Actua...
Definition iter.hpp:91
Core library for C++ with Zig-related functionality.
Represents a Result that may have an error (error code) or a success value A type of "void" means the...
Definition result.hpp:170
A Slice which is just a pointer + length Accessing elements through the array operator will do bounds...
constexpr size_t size() const noexcept
Gets the size of a Slice.
constexpr bool empty() const noexcept
Checks if a Slice is empty.
decltype(auto) iter() const noexcept
Error when unescaping characters fails.
Definition ascii.hpp:325
bool operator!=(const UnescapeError &o) const
Definition ascii.hpp:329
static const UnescapeError INVALID_ESCAPE_SEQUENCE
Definition ascii.hpp:332
static const UnescapeError EMPTY_INPUT
Definition ascii.hpp:331
bool operator==(const UnescapeError &o) const
Definition ascii.hpp:327