Skip to content

Commit 08e40c1

Browse files
authored
Reapply "[MemProf] Change histogram storage from uint64_t to uint16_t… (#151431)
Reapply #147854 after fixes merged in #151398. Change memory access histogram storage from uint64_t to uint16_t to reduce profile size on disk. This change updates the raw profile format to v5. Also add a histogram test in compiler-rt since we didn't have one before. With this change the histogram memprof raw for the basic test reduces from 75KB -> 20KB.
1 parent 70471f0 commit 08e40c1

29 files changed

+259
-32
lines changed

compiler-rt/include/profile/MemProfData.inc

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,10 @@
3333
(uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129)
3434

3535
// The version number of the raw binary format.
36-
#define MEMPROF_RAW_VERSION 4ULL
36+
#define MEMPROF_RAW_VERSION 5ULL
3737

3838
// Currently supported versions.
39-
#define MEMPROF_RAW_SUPPORTED_VERSIONS \
40-
{ 3ULL, 4ULL }
39+
#define MEMPROF_RAW_SUPPORTED_VERSIONS {3ULL, 4ULL, 5ULL}
4140

4241
#define MEMPROF_V3_MIB_SIZE 132ULL;
4342

@@ -229,6 +228,41 @@ void Merge(const MemInfoBlock &newMIB) {
229228
} __attribute__((__packed__));
230229
#endif
231230

231+
constexpr int MantissaBits = 12;
232+
constexpr int ExponentBits = 4;
233+
constexpr uint16_t MaxMantissa = (1U << MantissaBits) - 1;
234+
constexpr uint16_t MaxExponent = (1U << ExponentBits) - 1;
235+
constexpr uint64_t MaxRepresentableValue = static_cast<uint64_t>(MaxMantissa)
236+
<< MaxExponent;
237+
238+
// Encodes a 64-bit unsigned integer into a 16-bit scaled integer format.
239+
inline uint16_t encodeHistogramCount(uint64_t Count) {
240+
if (Count == 0)
241+
return 0;
242+
243+
if (Count > MaxRepresentableValue)
244+
Count = MaxRepresentableValue;
245+
246+
if (Count <= MaxMantissa)
247+
return Count;
248+
249+
uint64_t M = Count;
250+
uint16_t E = 0;
251+
while (M > MaxMantissa) {
252+
M = (M + 1) >> 1;
253+
E++;
254+
}
255+
return (E << MantissaBits) | static_cast<uint16_t>(M);
256+
}
257+
258+
// Decodes a 16-bit scaled integer and returns the
259+
// decoded 64-bit unsigned integer.
260+
inline uint64_t decodeHistogramCount(uint16_t EncodedValue) {
261+
const uint16_t E = EncodedValue >> MantissaBits;
262+
const uint16_t M = EncodedValue & MaxMantissa;
263+
return static_cast<uint64_t>(M) << E;
264+
}
265+
232266
} // namespace memprof
233267
} // namespace llvm
234268

compiler-rt/lib/memprof/memprof_rawprofile.cpp

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ using ::__sanitizer::Vector;
1919
using ::llvm::memprof::MemInfoBlock;
2020
using SegmentEntry = ::llvm::memprof::SegmentEntry;
2121
using Header = ::llvm::memprof::Header;
22+
using ::llvm::memprof::encodeHistogramCount;
2223

2324
namespace {
2425
template <class T> char *WriteBytes(const T &Pod, char *Buffer) {
@@ -169,13 +170,15 @@ void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const Vector<u64> &StackIds,
169170
// FIXME: We unnecessarily serialize the AccessHistogram pointer. Adding a
170171
// serialization schema will fix this issue. See also FIXME in
171172
// deserialization.
172-
Ptr = WriteBytes((*h)->mib, Ptr);
173-
for (u64 j = 0; j < (*h)->mib.AccessHistogramSize; ++j) {
174-
u64 HistogramEntry = ((u64 *)((*h)->mib.AccessHistogram))[j];
173+
auto &MIB = (*h)->mib;
174+
Ptr = WriteBytes(MIB, Ptr);
175+
for (u64 j = 0; j < MIB.AccessHistogramSize; ++j) {
176+
u16 HistogramEntry =
177+
encodeHistogramCount(((u64 *)(MIB.AccessHistogram))[j]);
175178
Ptr = WriteBytes(HistogramEntry, Ptr);
176179
}
177-
if ((*h)->mib.AccessHistogramSize > 0) {
178-
InternalFree((void *)((*h)->mib.AccessHistogram));
180+
if (MIB.AccessHistogramSize > 0) {
181+
InternalFree((void *)MIB.AccessHistogram);
179182
}
180183
}
181184
CHECK(ExpectedNumBytes >= static_cast<u64>(Ptr - Buffer) &&
@@ -249,7 +252,7 @@ u64 SerializeToRawProfile(MIBMapTy &MIBMap, ArrayRef<LoadedModule> Modules,
249252
},
250253
reinterpret_cast<void *>(&TotalAccessHistogramEntries));
251254
const u64 NumHistogramBytes =
252-
RoundUpTo(TotalAccessHistogramEntries * sizeof(uint64_t), 8);
255+
RoundUpTo(TotalAccessHistogramEntries * sizeof(uint16_t), 8);
253256

254257
const u64 NumStackBytes = RoundUpTo(StackSizeBytes(StackIds), 8);
255258

compiler-rt/lib/memprof/tests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ set(MEMPROF_SOURCES
2626
../memprof_rawprofile.cpp)
2727

2828
set(MEMPROF_UNITTESTS
29+
histogram_encoding.cpp
2930
rawprofile.cpp
3031
driver.cpp)
3132

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#include <cstdint>
2+
#include <vector>
3+
4+
#include "profile/MemProfData.inc"
5+
#include "gtest/gtest.h"
6+
7+
namespace llvm {
8+
namespace memprof {
9+
namespace {
10+
TEST(MemProf, F16EncodeDecode) {
11+
const std::vector<uint64_t> TestCases = {
12+
0, 100, 4095, 4096, 5000, 8191, 65535, 1000000, 134213640, 200000000,
13+
};
14+
15+
for (const uint64_t TestCase : TestCases) {
16+
const uint16_t Encoded = encodeHistogramCount(TestCase);
17+
const uint64_t Decoded = decodeHistogramCount(Encoded);
18+
19+
const uint64_t MaxRepresentable = static_cast<uint64_t>(MaxMantissa)
20+
<< MaxExponent;
21+
22+
if (TestCase >= MaxRepresentable) {
23+
EXPECT_EQ(Decoded, MaxRepresentable);
24+
} else if (TestCase <= MaxMantissa) {
25+
EXPECT_EQ(Decoded, TestCase);
26+
} else {
27+
// The decoded value should be close to the original value.
28+
// The error should be less than 1/1024 for larger numbers.
29+
EXPECT_NEAR(Decoded, TestCase, static_cast<double>(TestCase) / 1024.0);
30+
}
31+
}
32+
}
33+
} // namespace
34+
} // namespace memprof
35+
} // namespace llvm

llvm/include/llvm/ProfileData/MemProfData.inc

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,10 @@
3333
(uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129)
3434

3535
// The version number of the raw binary format.
36-
#define MEMPROF_RAW_VERSION 4ULL
36+
#define MEMPROF_RAW_VERSION 5ULL
3737

3838
// Currently supported versions.
39-
#define MEMPROF_RAW_SUPPORTED_VERSIONS \
40-
{ 3ULL, 4ULL }
39+
#define MEMPROF_RAW_SUPPORTED_VERSIONS {3ULL, 4ULL, 5ULL}
4140

4241
#define MEMPROF_V3_MIB_SIZE 132ULL;
4342

@@ -229,6 +228,41 @@ void Merge(const MemInfoBlock &newMIB) {
229228
} __attribute__((__packed__));
230229
#endif
231230

231+
constexpr int MantissaBits = 12;
232+
constexpr int ExponentBits = 4;
233+
constexpr uint16_t MaxMantissa = (1U << MantissaBits) - 1;
234+
constexpr uint16_t MaxExponent = (1U << ExponentBits) - 1;
235+
constexpr uint64_t MaxRepresentableValue = static_cast<uint64_t>(MaxMantissa)
236+
<< MaxExponent;
237+
238+
// Encodes a 64-bit unsigned integer into a 16-bit scaled integer format.
239+
inline uint16_t encodeHistogramCount(uint64_t Count) {
240+
if (Count == 0)
241+
return 0;
242+
243+
if (Count > MaxRepresentableValue)
244+
Count = MaxRepresentableValue;
245+
246+
if (Count <= MaxMantissa)
247+
return Count;
248+
249+
uint64_t M = Count;
250+
uint16_t E = 0;
251+
while (M > MaxMantissa) {
252+
M = (M + 1) >> 1;
253+
E++;
254+
}
255+
return (E << MantissaBits) | static_cast<uint16_t>(M);
256+
}
257+
258+
// Decodes a 16-bit scaled integer and returns the
259+
// decoded 64-bit unsigned integer.
260+
inline uint64_t decodeHistogramCount(uint16_t EncodedValue) {
261+
const uint16_t E = EncodedValue >> MantissaBits;
262+
const uint16_t M = EncodedValue & MaxMantissa;
263+
return static_cast<uint64_t>(M) << E;
264+
}
265+
232266
} // namespace memprof
233267
} // namespace llvm
234268

llvm/lib/ProfileData/MemProfReader.cpp

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ readMemInfoBlocksV3(const char *Ptr) {
135135
}
136136

137137
llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
138-
readMemInfoBlocksV4(const char *Ptr) {
138+
readMemInfoBlocksCommon(const char *Ptr, bool IsHistogramEncoded = false) {
139139
using namespace support;
140140

141141
const uint64_t NumItemsToRead =
@@ -145,27 +145,43 @@ readMemInfoBlocksV4(const char *Ptr) {
145145
for (uint64_t I = 0; I < NumItemsToRead; I++) {
146146
const uint64_t Id =
147147
endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
148-
// We cheat a bit here and remove the const from cast to set the
149-
// Histogram Pointer to newly allocated buffer.
150-
MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr);
151148

152-
// Only increment by size of MIB since readNext implicitly increments.
149+
MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr);
153150
Ptr += sizeof(MemInfoBlock);
154151

155152
if (MIB.AccessHistogramSize > 0) {
153+
// The in-memory representation uses uint64_t for histogram entries.
156154
MIB.AccessHistogram =
157155
(uintptr_t)malloc(MIB.AccessHistogramSize * sizeof(uint64_t));
158-
}
159-
160-
for (uint64_t J = 0; J < MIB.AccessHistogramSize; J++) {
161-
((uint64_t *)MIB.AccessHistogram)[J] =
162-
endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
156+
for (uint64_t J = 0; J < MIB.AccessHistogramSize; J++) {
157+
if (!IsHistogramEncoded) {
158+
((uint64_t *)MIB.AccessHistogram)[J] =
159+
endian::readNext<uint64_t, llvm::endianness::little, unaligned>(
160+
Ptr);
161+
} else {
162+
// The encoded on-disk format (V5 onwards) uses uint16_t.
163+
const uint16_t Val =
164+
endian::readNext<uint16_t, llvm::endianness::little, unaligned>(
165+
Ptr);
166+
((uint64_t *)MIB.AccessHistogram)[J] = decodeHistogramCount(Val);
167+
}
168+
}
163169
}
164170
Items.push_back({Id, MIB});
165171
}
166172
return Items;
167173
}
168174

175+
llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
176+
readMemInfoBlocksV4(const char *Ptr) {
177+
return readMemInfoBlocksCommon(Ptr);
178+
}
179+
180+
llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
181+
readMemInfoBlocksV5(const char *Ptr) {
182+
return readMemInfoBlocksCommon(Ptr, /*IsHistogramEncoded=*/true);
183+
}
184+
169185
CallStackMap readStackInfo(const char *Ptr) {
170186
using namespace support;
171187

@@ -658,6 +674,8 @@ RawMemProfReader::readMemInfoBlocks(const char *Ptr) {
658674
return readMemInfoBlocksV3(Ptr);
659675
if (MemprofRawVersion == 4ULL)
660676
return readMemInfoBlocksV4(Ptr);
677+
if (MemprofRawVersion == 5ULL)
678+
return readMemInfoBlocksV5(Ptr);
661679
llvm_unreachable(
662680
"Panic: Unsupported version number when reading MemInfoBlocks");
663681
}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)