Skip to content

Commit d43bc22

Browse files
lheckerDHowett
authored andcommitted
Fix til string to integer routines (microsoft#18276)
(cherry picked from commit 89bc36c) Service-Card-Id: PVTI_lADOAF3p4s4AmhmQzgZtkWk Service-Version: 1.22
1 parent fa65a29 commit d43bc22

File tree

2 files changed

+108
-44
lines changed

2 files changed

+108
-44
lines changed

src/inc/til/string.h

Lines changed: 39 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -361,61 +361,71 @@ namespace til // Terminal Implementation Library. Also: "Today I Learned"
361361
_TIL_INLINEPREFIX constexpr std::optional<uint64_t> parse_u64(const std::basic_string_view<T, Traits>& str, int base = 0) noexcept
362362
{
363363
// We don't have to test ptr for nullability, as we only access it under either condition:
364-
// * str.length() > 0, for determining the base
364+
// * str.size() > 0, for determining the base
365365
// * ptr != end, when parsing the characters; if ptr is null, length will be 0 and thus end == ptr
366366
#pragma warning(push)
367367
#pragma warning(disable : 26429) // Symbol 'ptr' is never tested for nullness, it can be marked as not_null
368368
#pragma warning(disable : 26451) // Arithmetic overflow: Using operator '+' on a 4 byte value and then casting the result to a 8 byte value. [...]
369369
#pragma warning(disable : 26481) // Don't use pointer arithmetic. Use span instead
370370
auto ptr = str.data();
371-
const auto end = ptr + str.length();
371+
const auto end = ptr + str.size();
372372
uint64_t accumulator = 0;
373-
uint64_t base64 = base;
373+
uint64_t base_uint64 = base;
374374

375375
if (base <= 0)
376376
{
377-
base64 = 10;
377+
base_uint64 = 10;
378378

379-
if (ptr != end && *ptr == '0')
379+
if (str.size() >= 2 && *ptr == '0')
380380
{
381-
base64 = 8;
381+
base_uint64 = 8;
382382
ptr += 1;
383383

384-
if (ptr != end && (*ptr == 'x' || *ptr == 'X'))
384+
// Shift to lowercase to make the comparison easier.
385+
const auto ch = *ptr | 0x20;
386+
387+
if (ch == 'b')
388+
{
389+
base_uint64 = 2;
390+
ptr += 1;
391+
}
392+
else if (ch == 'x')
385393
{
386-
base64 = 16;
394+
base_uint64 = 16;
387395
ptr += 1;
388396
}
389397
}
390398
}
391399

392-
if (ptr == end)
400+
if (ptr == end || base_uint64 > 36)
393401
{
394402
return {};
395403
}
396404

405+
const auto max_before_mul = UINT64_MAX / base_uint64;
406+
397407
for (;;)
398408
{
399-
uint64_t value = 0;
400-
if (*ptr >= '0' && *ptr <= '9')
401-
{
402-
value = *ptr - '0';
403-
}
404-
else if (*ptr >= 'A' && *ptr <= 'F')
405-
{
406-
value = *ptr - 'A' + 10;
407-
}
408-
else if (*ptr >= 'a' && *ptr <= 'f')
409-
{
410-
value = *ptr - 'a' + 10;
411-
}
412-
else
413-
{
414-
return {};
415-
}
416-
417-
const auto acc = accumulator * base64 + value;
418-
if (acc < accumulator)
409+
// Magic mapping from 0-9, A-Z, a-z to 0-35 go brrr. Invalid values are >35.
410+
const uint64_t ch = *ptr;
411+
const uint64_t sub = ch >= '0' && ch <= '9' ? (('0' - 1) & ~0x20) : (('A' - 1) & ~0x20) - 10;
412+
// 'A' and 'a' reside at 0b...00001. By subtracting 1 we shift them to 0b...00000.
413+
// We can then mask off 0b..1..... (= 0x20) to map a-z to A-Z.
414+
// Once we subtract `sub`, all characters between Z and a will underflow.
415+
// This results in A-Z and a-z mapping to 10-35.
416+
const uint64_t value = ((ch - 1) & ~0x20) - sub;
417+
418+
// This is where we'd be using __builtin_mul_overflow and __builtin_add_overflow,
419+
// but when MSVC finally added support for it in v17.7, it had a different name,
420+
// only worked on x86, and only for signed integers. So, we can't use it.
421+
const auto acc = accumulator * base_uint64 + value;
422+
if (
423+
// Check for invalid inputs.
424+
value >= base_uint64 ||
425+
// Check for multiplication overflow.
426+
accumulator > max_before_mul ||
427+
// Check for addition overflow.
428+
acc < accumulator)
419429
{
420430
return {};
421431
}

src/til/ut_til/string.cpp

Lines changed: 69 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,12 @@ class StringTests
7979
// clang++ -fsanitize=address,undefined,fuzzer -std=c++17 file.cpp
8080
// and was run for 20min across 16 jobs in parallel.
8181
#if 0
82+
template<typename T, typename Traits>
83+
std::optional<uint64_t> parse_u64(const std::basic_string_view<T, Traits>& str, int base = 0) noexcept
84+
{
85+
// ... implementation ...
86+
}
87+
8288
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)
8389
{
8490
while (size > 0 && (isspace(*data) || *data == '+' || *data == '-'))
@@ -93,27 +99,17 @@ class StringTests
9399
}
94100

95101
char narrow_buffer[128];
96-
wchar_t wide_buffer[128];
97-
98102
memcpy(narrow_buffer, data, size);
99-
for (size_t i = 0; i < size; ++i)
100-
{
101-
wide_buffer[i] = data[i];
102-
}
103-
104103
// strtoul requires a null terminator
105104
narrow_buffer[size] = 0;
106-
wide_buffer[size] = 0;
107105

108106
char* end;
109-
const auto expected = strtoul(narrow_buffer, &end, 0);
110-
if (end != narrow_buffer + size || expected >= ULONG_MAX / 16)
111-
{
112-
return 0;
113-
}
107+
const auto val = strtoull(narrow_buffer, &end, 0);
108+
const auto bad = end != narrow_buffer + size || val == ULLONG_MAX;
109+
const auto expected = bad ? std::nullopt : std::optional{ val };
114110

115-
const auto actual = parse_u64({ wide_buffer, size });
116-
if (expected != actual)
111+
const auto actual = parse_u64(std::string_view{ narrow_buffer, size });
112+
if (expected != actual && actual.value_or(0) != ULLONG_MAX)
117113
{
118114
__builtin_trap();
119115
}
@@ -122,6 +118,64 @@ class StringTests
122118
}
123119
#endif
124120

121+
TEST_METHOD(parse_u64_overflow)
122+
{
123+
VERIFY_ARE_EQUAL(UINT64_C(18446744073709551614), til::details::parse_u64(std::string_view{ "18446744073709551614" }));
124+
VERIFY_ARE_EQUAL(UINT64_C(18446744073709551615), til::details::parse_u64(std::string_view{ "18446744073709551615" }));
125+
VERIFY_ARE_EQUAL(std::nullopt, til::details::parse_u64(std::string_view{ "18446744073709551616" }));
126+
VERIFY_ARE_EQUAL(std::nullopt, til::details::parse_u64(std::string_view{ "18446744073709551617" }));
127+
VERIFY_ARE_EQUAL(std::nullopt, til::details::parse_u64(std::string_view{ "88888888888888888888" }));
128+
}
129+
130+
TEST_METHOD(parse_unsigned)
131+
{
132+
VERIFY_ARE_EQUAL(std::nullopt, til::parse_unsigned<uint32_t>(""));
133+
VERIFY_ARE_EQUAL(std::nullopt, til::parse_unsigned<uint32_t>("0x"));
134+
VERIFY_ARE_EQUAL(std::nullopt, til::parse_unsigned<uint32_t>("Z"));
135+
VERIFY_ARE_EQUAL(std::nullopt, til::parse_unsigned<uint32_t>("0xZ"));
136+
VERIFY_ARE_EQUAL(std::nullopt, til::parse_unsigned<uint32_t>("0Z"));
137+
VERIFY_ARE_EQUAL(std::nullopt, til::parse_unsigned<uint32_t>("123abc"));
138+
VERIFY_ARE_EQUAL(std::nullopt, til::parse_unsigned<uint32_t>("0123abc"));
139+
VERIFY_ARE_EQUAL(std::nullopt, til::parse_unsigned<uint32_t>("0x100000000"));
140+
VERIFY_ARE_EQUAL(0u, til::parse_unsigned<uint32_t>("0"));
141+
VERIFY_ARE_EQUAL(0u, til::parse_unsigned<uint32_t>("0x0"));
142+
VERIFY_ARE_EQUAL(0123u, til::parse_unsigned<uint32_t>("0123"));
143+
VERIFY_ARE_EQUAL(123u, til::parse_unsigned<uint32_t>("123"));
144+
VERIFY_ARE_EQUAL(0x123u, til::parse_unsigned<uint32_t>("0x123"));
145+
VERIFY_ARE_EQUAL(0x123abcu, til::parse_unsigned<uint32_t>("0x123abc"));
146+
VERIFY_ARE_EQUAL(0X123ABCu, til::parse_unsigned<uint32_t>("0X123ABC"));
147+
VERIFY_ARE_EQUAL(UINT32_MAX, til::parse_unsigned<uint32_t>("0xffffffff"));
148+
VERIFY_ARE_EQUAL(UINT32_MAX, til::parse_unsigned<uint32_t>("4294967295"));
149+
}
150+
151+
TEST_METHOD(parse_signed)
152+
{
153+
VERIFY_ARE_EQUAL(std::nullopt, til::parse_signed<int32_t>(""));
154+
VERIFY_ARE_EQUAL(std::nullopt, til::parse_signed<int32_t>("-"));
155+
VERIFY_ARE_EQUAL(std::nullopt, til::parse_signed<int32_t>("--"));
156+
VERIFY_ARE_EQUAL(std::nullopt, til::parse_signed<int32_t>("--0"));
157+
VERIFY_ARE_EQUAL(std::nullopt, til::parse_signed<int32_t>("-0Z"));
158+
VERIFY_ARE_EQUAL(std::nullopt, til::parse_signed<int32_t>("-123abc"));
159+
VERIFY_ARE_EQUAL(std::nullopt, til::parse_signed<int32_t>("-0123abc"));
160+
VERIFY_ARE_EQUAL(std::nullopt, til::parse_signed<int32_t>("0x80000000"));
161+
VERIFY_ARE_EQUAL(std::nullopt, til::parse_signed<int32_t>("-0x80000001"));
162+
VERIFY_ARE_EQUAL(0, til::parse_signed<int32_t>("0"));
163+
VERIFY_ARE_EQUAL(0, til::parse_signed<int32_t>("-0"));
164+
VERIFY_ARE_EQUAL(0, til::parse_signed<int32_t>("-0x0"));
165+
VERIFY_ARE_EQUAL(0123, til::parse_signed<int32_t>("0123"));
166+
VERIFY_ARE_EQUAL(123, til::parse_signed<int32_t>("123"));
167+
VERIFY_ARE_EQUAL(0x123, til::parse_signed<int32_t>("0x123"));
168+
VERIFY_ARE_EQUAL(-0123, til::parse_signed<int32_t>("-0123"));
169+
VERIFY_ARE_EQUAL(-123, til::parse_signed<int32_t>("-123"));
170+
VERIFY_ARE_EQUAL(-0x123, til::parse_signed<int32_t>("-0x123"));
171+
VERIFY_ARE_EQUAL(-0x123abc, til::parse_signed<int32_t>("-0x123abc"));
172+
VERIFY_ARE_EQUAL(-0X123ABC, til::parse_signed<int32_t>("-0X123ABC"));
173+
VERIFY_ARE_EQUAL(INT32_MIN, til::parse_signed<int32_t>("-0x80000000"));
174+
VERIFY_ARE_EQUAL(INT32_MIN, til::parse_signed<int32_t>("-2147483648"));
175+
VERIFY_ARE_EQUAL(INT32_MAX, til::parse_signed<int32_t>("0x7fffffff"));
176+
VERIFY_ARE_EQUAL(INT32_MAX, til::parse_signed<int32_t>("2147483647"));
177+
}
178+
125179
TEST_METHOD(tolower_ascii)
126180
{
127181
for (wchar_t ch = 0; ch < 128; ++ch)

0 commit comments

Comments
 (0)