From 74747d14bade85c631b685c77004ac2a32fd0899 Mon Sep 17 00:00:00 2001 From: Grigory Pastukhov Date: Thu, 24 Jul 2025 13:50:13 -0700 Subject: [PATCH 1/7] Change DwarfUnit field to vector in BinaryFunction --- bolt/include/bolt/Core/BinaryFunction.h | 27 ++++++--- bolt/lib/Core/BinaryContext.cpp | 38 +++++++----- bolt/lib/Core/BinaryEmitter.cpp | 80 ++++++++++++++----------- bolt/lib/Core/BinaryFunction.cpp | 13 +++- 4 files changed, 95 insertions(+), 63 deletions(-) diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index ae580520b9110..966559e0c6fa6 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -423,8 +423,8 @@ class BinaryFunction { /// Original LSDA type encoding unsigned LSDATypeEncoding{dwarf::DW_EH_PE_omit}; - /// Containing compilation unit for the function. - DWARFUnit *DwarfUnit{nullptr}; + /// All compilation units this function belongs to. + SmallVector DwarfUnitVec; /// Last computed hash value. Note that the value could be recomputed using /// different parameters by every pass. @@ -2414,15 +2414,24 @@ class BinaryFunction { void computeBlockHashes(HashFunction HashFunction = HashFunction::Default) const; - void setDWARFUnit(DWARFUnit *Unit) { DwarfUnit = Unit; } + void addDWARFUnit(DWARFUnit *Unit) { DwarfUnitVec.push_back(Unit); } - /// Return DWARF compile unit for this function. - DWARFUnit *getDWARFUnit() const { return DwarfUnit; } + void removeDWARFUnit(DWARFUnit *Unit) { + auto *It = std::find(DwarfUnitVec.begin(), DwarfUnitVec.end(), Unit); + // If found, erase it + if (It != DwarfUnitVec.end()) { + DwarfUnitVec.erase(It); + } + } + + /// Return DWARF compile units for this function. + const SmallVector getDWARFUnits() const { + return DwarfUnitVec; + } - /// Return line info table for this function. - const DWARFDebugLine::LineTable *getDWARFLineTable() const { - return getDWARFUnit() ? BC.DwCtx->getLineTableForUnit(getDWARFUnit()) - : nullptr; + const DWARFDebugLine::LineTable * + getDWARFLineTableForUnit(DWARFUnit *Unit) const { + return BC.DwCtx->getLineTableForUnit(Unit); } /// Finalize profile for the function. diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index 84f1853469709..c58d99a77f8b3 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -1697,22 +1697,35 @@ void BinaryContext::preprocessDebugInfo() { auto It = llvm::partition_point( AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; }); - if (It != AllRanges.end() && It->LowPC <= FunctionAddress) - Function.setDWARFUnit(It->Unit); + if (It == AllRanges.end() || It->LowPC > FunctionAddress) { + continue; + } + Function.addDWARFUnit(It->Unit); + + // Go forward and add all units from ranges that cover the function + while (++It != AllRanges.end()) { + if (It->LowPC <= FunctionAddress && FunctionAddress < It->HighPC) { + Function.addDWARFUnit(It->Unit); + } else { + break; + } + } } // Discover units with debug info that needs to be updated. for (const auto &KV : BinaryFunctions) { const BinaryFunction &BF = KV.second; - if (shouldEmit(BF) && BF.getDWARFUnit()) - ProcessedCUs.insert(BF.getDWARFUnit()); + if (shouldEmit(BF) && !BF.getDWARFUnits().empty()) + for (const DWARFUnit *Unit : BF.getDWARFUnits()) + ProcessedCUs.insert(Unit); } - // Clear debug info for functions from units that we are not going to process. for (auto &KV : BinaryFunctions) { BinaryFunction &BF = KV.second; - if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit())) - BF.setDWARFUnit(nullptr); + for (auto *Unit : BF.getDWARFUnits()) { + if (!ProcessedCUs.count(Unit)) + BF.removeDWARFUnit(Unit); + } } if (opts::Verbosity >= 1) { @@ -1912,14 +1925,9 @@ static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction, if (RowRef == DebugLineTableRowRef::NULL_ROW) return; - const DWARFDebugLine::LineTable *LineTable; - if (Function && Function->getDWARFUnit() && - Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) { - LineTable = Function->getDWARFLineTable(); - } else { - LineTable = DwCtx->getLineTableForUnit( - DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); - } + const DWARFDebugLine::LineTable *LineTable = DwCtx->getLineTableForUnit( + DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); + assert(LineTable && "line table expected for instruction with debug info"); const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp index 7b5cd276fee89..34bda7403d259 100644 --- a/bolt/lib/Core/BinaryEmitter.cpp +++ b/bolt/lib/Core/BinaryEmitter.cpp @@ -177,7 +177,8 @@ class BinaryEmitter { /// Note that it does not automatically result in the insertion of the EOS /// marker in the line table program, but provides one to the DWARF generator /// when it needs it. - void emitLineInfoEnd(const BinaryFunction &BF, MCSymbol *FunctionEndSymbol); + void emitLineInfoEnd(const BinaryFunction &BF, MCSymbol *FunctionEndSymbol, + DWARFUnit *Unit); /// Emit debug line info for unprocessed functions from CUs that include /// emitted functions. @@ -436,8 +437,9 @@ bool BinaryEmitter::emitFunction(BinaryFunction &Function, Streamer.emitELFSize(StartSymbol, SizeExpr); } - if (opts::UpdateDebugSections && Function.getDWARFUnit()) - emitLineInfoEnd(Function, EndSymbol); + // TODO: Emit line info end for all the CUs that contain the function. + if (opts::UpdateDebugSections && !Function.getDWARFUnits().empty()) + emitLineInfoEnd(Function, EndSymbol, Function.getDWARFUnits().front()); // Exception handling info for the function. emitLSDA(Function, FF); @@ -486,7 +488,7 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF, // A symbol to be emitted before the instruction to mark its location. MCSymbol *InstrLabel = BC.MIB->getInstLabel(Instr); - if (opts::UpdateDebugSections && BF.getDWARFUnit()) { + if (opts::UpdateDebugSections && !BF.getDWARFUnits().empty()) { LastLocSeen = emitLineInfo(BF, Instr.getLoc(), LastLocSeen, FirstInstr, InstrLabel); FirstInstr = false; @@ -679,8 +681,10 @@ void BinaryEmitter::emitConstantIslands(BinaryFunction &BF, bool EmitColdPart, SMLoc BinaryEmitter::emitLineInfo(const BinaryFunction &BF, SMLoc NewLoc, SMLoc PrevLoc, bool FirstInstr, MCSymbol *&InstrLabel) { - DWARFUnit *FunctionCU = BF.getDWARFUnit(); - const DWARFDebugLine::LineTable *FunctionLineTable = BF.getDWARFLineTable(); + // TODO: implment emitting into line tables corresponding to multiple CUs + DWARFUnit *FunctionCU = BF.getDWARFUnits().front(); + const DWARFDebugLine::LineTable *FunctionLineTable = + BF.getDWARFLineTableForUnit(FunctionCU); assert(FunctionCU && "cannot emit line info for function without CU"); DebugLineTableRowRef RowReference = DebugLineTableRowRef::fromSMLoc(NewLoc); @@ -740,13 +744,13 @@ SMLoc BinaryEmitter::emitLineInfo(const BinaryFunction &BF, SMLoc NewLoc, } void BinaryEmitter::emitLineInfoEnd(const BinaryFunction &BF, - MCSymbol *FunctionEndLabel) { - DWARFUnit *FunctionCU = BF.getDWARFUnit(); - assert(FunctionCU && "DWARF unit expected"); + MCSymbol *FunctionEndLabel, + DWARFUnit *Unit) { + assert(Unit && "DWARF unit expected"); BC.Ctx->setCurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_END_SEQUENCE, 0, 0); const MCDwarfLoc &DwarfLoc = BC.Ctx->getCurrentDwarfLoc(); BC.Ctx->clearDwarfLocSeen(); - BC.getDwarfLineTable(FunctionCU->getOffset()) + BC.getDwarfLineTable(Unit->getOffset()) .getMCLineSections() .addLineEntry(MCDwarfLineEntry(FunctionEndLabel, DwarfLoc), Streamer.getCurrentSectionOnly()); @@ -1115,36 +1119,40 @@ void BinaryEmitter::emitDebugLineInfoForOriginalFunctions() { if (Function.isEmitted()) continue; - const DWARFDebugLine::LineTable *LineTable = Function.getDWARFLineTable(); - if (!LineTable) - continue; // nothing to update for this function + // Loop through all CUs in the function + for (DWARFUnit *Unit : Function.getDWARFUnits()) { + const DWARFDebugLine::LineTable *LineTable = + Function.getDWARFLineTableForUnit(Unit); + if (!LineTable) + continue; // nothing to update for this unit + + const uint64_t Address = Function.getAddress(); + std::vector Results; + if (!LineTable->lookupAddressRange( + {Address, object::SectionedAddress::UndefSection}, + Function.getSize(), Results)) + continue; - const uint64_t Address = Function.getAddress(); - std::vector Results; - if (!LineTable->lookupAddressRange( - {Address, object::SectionedAddress::UndefSection}, - Function.getSize(), Results)) - continue; + if (Results.empty()) + continue; - if (Results.empty()) - continue; + // The first row returned could be the last row matching the start + // address. Find the first row with the same address that is not the end + // of the sequence. + uint64_t FirstRow = Results.front(); + while (FirstRow > 0) { + const DWARFDebugLine::Row &PrevRow = LineTable->Rows[FirstRow - 1]; + if (PrevRow.Address.Address != Address || PrevRow.EndSequence) + break; + --FirstRow; + } - // The first row returned could be the last row matching the start address. - // Find the first row with the same address that is not the end of the - // sequence. - uint64_t FirstRow = Results.front(); - while (FirstRow > 0) { - const DWARFDebugLine::Row &PrevRow = LineTable->Rows[FirstRow - 1]; - if (PrevRow.Address.Address != Address || PrevRow.EndSequence) - break; - --FirstRow; + const uint64_t EndOfSequenceAddress = + Function.getAddress() + Function.getMaxSize(); + BC.getDwarfLineTable(Unit->getOffset()) + .addLineTableSequence(LineTable, FirstRow, Results.back(), + EndOfSequenceAddress); } - - const uint64_t EndOfSequenceAddress = - Function.getAddress() + Function.getMaxSize(); - BC.getDwarfLineTable(Function.getDWARFUnit()->getOffset()) - .addLineTableSequence(LineTable, FirstRow, Results.back(), - EndOfSequenceAddress); } // For units that are completely unprocessed, use original debug line contents diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index eec68ff5a5fce..bbe04a17c0ad3 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -1496,9 +1496,16 @@ Error BinaryFunction::disassemble() { } add_instruction: - if (getDWARFLineTable()) { - Instruction.setLoc(findDebugLineInformationForInstructionAt( - AbsoluteInstrAddr, getDWARFUnit(), getDWARFLineTable())); + // TODO: Handle multiple DWARF compilation units properly. + // For now, use the first unit if available. + if (!getDWARFUnits().empty()) { + DWARFUnit *FirstUnit = getDWARFUnits().front(); + const DWARFDebugLine::LineTable *LineTable = + getDWARFLineTableForUnit(FirstUnit); + if (LineTable) { + Instruction.setLoc(findDebugLineInformationForInstructionAt( + AbsoluteInstrAddr, FirstUnit, LineTable)); + } } // Record offset of the instruction for profile matching. From 885937d5529f3ba2b047d8658ed85aa34b515ffe Mon Sep 17 00:00:00 2001 From: Grigory Pastukhov Date: Thu, 24 Jul 2025 20:30:10 -0700 Subject: [PATCH 2/7] Implemented multiple rows per instruction logic --- bolt/include/bolt/Core/BinaryContext.h | 6 + bolt/include/bolt/Core/DebugData.h | 116 +++++++++++++---- bolt/lib/Core/BinaryContext.cpp | 27 ++-- bolt/lib/Core/BinaryEmitter.cpp | 136 ++++++++++++-------- bolt/lib/Core/BinaryFunction.cpp | 58 ++++----- bolt/lib/Core/DebugData.cpp | 2 - bolt/test/Inputs/multi-cu-common.h | 10 ++ bolt/test/Inputs/multi-cu-file1.c | 9 ++ bolt/test/Inputs/multi-cu-file2.c | 8 ++ bolt/test/Inputs/process-debug-line.sh | 101 +++++++++++++++ bolt/test/X86/multi-cu-debug-line.test | 108 ++++++++++++++++ bolt/test/perf2bolt/Inputs/perf_test.lds | 11 +- bolt/unittests/Core/CMakeLists.txt | 1 + bolt/unittests/Core/ClusteredRows.cpp | 152 +++++++++++++++++++++++ 14 files changed, 621 insertions(+), 124 deletions(-) create mode 100644 bolt/test/Inputs/multi-cu-common.h create mode 100644 bolt/test/Inputs/multi-cu-file1.c create mode 100644 bolt/test/Inputs/multi-cu-file2.c create mode 100755 bolt/test/Inputs/process-debug-line.sh create mode 100644 bolt/test/X86/multi-cu-debug-line.test create mode 100644 bolt/unittests/Core/ClusteredRows.cpp diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index 91ecf89da618c..48bc9a5d1f92c 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -288,6 +288,12 @@ class BinaryContext { /// overwritten, but it is okay to re-generate debug info for them. std::set ProcessedCUs; + /// DWARF-related container to manage lifecycle of groups of rows from line + /// tables associated with instructions. Since binary functions can span + /// multiple compilation units, instructions may reference debug line + /// information from multiple CUs. + ClasteredRowsContainer ClasteredRows; + // Setup MCPlus target builder void initializeTarget(std::unique_ptr TargetBuilder) { MIB = std::move(TargetBuilder); diff --git a/bolt/include/bolt/Core/DebugData.h b/bolt/include/bolt/Core/DebugData.h index 6ea3b1af1024f..048594946d8a9 100644 --- a/bolt/include/bolt/Core/DebugData.h +++ b/bolt/include/bolt/Core/DebugData.h @@ -135,8 +135,6 @@ struct DebugLineTableRowRef { uint32_t DwCompileUnitIndex; uint32_t RowIndex; - const static DebugLineTableRowRef NULL_ROW; - bool operator==(const DebugLineTableRowRef &Rhs) const { return DwCompileUnitIndex == Rhs.DwCompileUnitIndex && RowIndex == Rhs.RowIndex; @@ -145,24 +143,6 @@ struct DebugLineTableRowRef { bool operator!=(const DebugLineTableRowRef &Rhs) const { return !(*this == Rhs); } - - static DebugLineTableRowRef fromSMLoc(const SMLoc &Loc) { - union { - decltype(Loc.getPointer()) Ptr; - DebugLineTableRowRef Ref; - } U; - U.Ptr = Loc.getPointer(); - return U.Ref; - } - - SMLoc toSMLoc() const { - union { - decltype(SMLoc().getPointer()) Ptr; - DebugLineTableRowRef Ref; - } U; - U.Ref = *this; - return SMLoc::getFromPointer(U.Ptr); - } }; /// Common buffer vector used for debug info handling. @@ -210,7 +190,7 @@ class DebugRangesSectionWriter { static bool classof(const DebugRangesSectionWriter *Writer) { return Writer->getKind() == RangesWriterKind::DebugRangesWriter; } - + /// Append a range to the main buffer. void appendToRangeBuffer(const DebugBufferVector &CUBuffer); @@ -852,6 +832,100 @@ class DwarfLineTable { // Returns DWARF Version for this line table. uint16_t getDwarfVersion() const { return DwarfVersion; } }; + +/// ClusteredRows represents a collection of debug line table row references. +/// Since a Binary function can belong to multiple compilation units (CUs), +/// a single MCInst can have multiple debug line table rows associated with it +/// from different CUs. This class manages such clustered row references. +/// +/// MEMORY LAYOUT AND DESIGN: +/// This class uses a flexible array member pattern to store all +/// DebugLineTableRowRef elements in a single contiguous memory allocation. +/// The memory layout is: +/// +/// +------------------+ +/// | ClusteredRows | <- Object header (Size + first element) +/// | - Size | +/// | - Raws (element) | <- First DebugLineTableRowRef element +/// +------------------+ +/// | element[1] | <- Additional DebugLineTableRowRef elements +/// | element[2] | stored immediately after the object +/// | ... | +/// | element[Size-1] | +/// +------------------+ +/// +/// PERFORMANCE BENEFITS: +/// - Single memory allocation: All elements are stored in one contiguous block, +/// eliminating the need for separate heap allocations for the array. +/// - No extra dereferencing: Elements are accessed directly via pointer +/// arithmetic (beginPtr() + offset) rather than through an additional +/// pointer indirection. +/// - Cache locality: All elements are guaranteed to be adjacent in memory, +/// improving cache performance during iteration. +/// - Memory efficiency: No overhead from separate pointer storage or +/// fragmented allocations. +/// +/// The 'Raws' member serves as both the first element storage and the base +/// address for pointer arithmetic to access subsequent elements. +class ClusteredRows { +public: + ArrayRef getRows() const { + return ArrayRef(beginPtrConst(), Size); + } + uint64_t size() const { return Size; } + static const ClusteredRows *fromSMLoc(const SMLoc &Loc) { + return reinterpret_cast(Loc.getPointer()); + } + SMLoc toSMLoc() const { + return SMLoc::getFromPointer(reinterpret_cast(this)); + } + + template void populate(const T Vec) { + assert(Vec.size() == Size && ""); + DebugLineTableRowRef *CurRawPtr = beginPtr(); + for (DebugLineTableRowRef RowRef : Vec) { + *CurRawPtr = RowRef; + ++CurRawPtr; + } + } + +private: + uint64_t Size; + DebugLineTableRowRef Raws; + + ClusteredRows(uint64_t Size) : Size(Size) {} + static uint64_t getTotalSize(uint64_t Size) { + assert(Size > 0 && "Size must be greater than 0"); + return sizeof(ClusteredRows) + (Size - 1) * sizeof(DebugLineTableRowRef); + } + const DebugLineTableRowRef *beginPtrConst() const { + return reinterpret_cast(&Raws); + } + DebugLineTableRowRef *beginPtr() { + return reinterpret_cast(&Raws); + } + + friend class ClasteredRowsContainer; +}; + +/// ClasteredRowsContainer manages the lifecycle of ClusteredRows objects. +class ClasteredRowsContainer { +public: + ClusteredRows *createClusteredRows(uint64_t Size) { + auto *CR = new (std::malloc(ClusteredRows::getTotalSize(Size))) + ClusteredRows(Size); + Clusters.push_back(CR); + return CR; + } + ~ClasteredRowsContainer() { + for (auto *CR : Clusters) + std::free(CR); + } + +private: + std::vector Clusters; +}; + } // namespace bolt } // namespace llvm diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index c58d99a77f8b3..1766b0540a5cd 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -1568,23 +1568,21 @@ unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID, DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID); const DWARFDebugLine::LineTable *LineTable = DwCtx->getLineTableForUnit(SrcUnit); - const std::vector &FileNames = - LineTable->Prologue.FileNames; + const DWARFDebugLine::FileNameEntry &FileNameEntry = + LineTable->Prologue.getFileNameEntry(FileIndex); // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 // means empty dir. - assert(FileIndex > 0 && FileIndex <= FileNames.size() && - "FileIndex out of range for the compilation unit."); StringRef Dir = ""; - if (FileNames[FileIndex - 1].DirIdx != 0) { + if (FileNameEntry.DirIdx != 0) { if (std::optional DirName = dwarf::toString( LineTable->Prologue - .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) { + .IncludeDirectories[FileNameEntry.DirIdx - 1])) { Dir = *DirName; } } StringRef FileName = ""; if (std::optional FName = - dwarf::toString(FileNames[FileIndex - 1].Name)) + dwarf::toString(FileNameEntry.Name)) FileName = *FName; assert(FileName != ""); DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID); @@ -1920,20 +1918,25 @@ bool BinaryContext::isMarker(const SymbolRef &Symbol) const { static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction, const BinaryFunction *Function, DWARFContext *DwCtx) { - DebugLineTableRowRef RowRef = - DebugLineTableRowRef::fromSMLoc(Instruction.getLoc()); - if (RowRef == DebugLineTableRowRef::NULL_ROW) + const ClusteredRows *LineTableRows = + ClusteredRows::fromSMLoc(Instruction.getLoc()); + if (LineTableRows == nullptr) return; + // File name and line number should be the same for all CUs. + // So it is sufficient to check the first one. + DebugLineTableRowRef RowRef = LineTableRows->getRows().front(); const DWARFDebugLine::LineTable *LineTable = DwCtx->getLineTableForUnit( DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); - assert(LineTable && "line table expected for instruction with debug info"); + if (!LineTable) + return; const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; StringRef FileName = ""; + if (std::optional FName = - dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name)) + dwarf::toString(LineTable->Prologue.getFileNameEntry(Row.File).Name)) FileName = *FName; OS << " # debug line " << FileName << ":" << Row.Line; if (Row.Column) diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp index 34bda7403d259..8862f0680cb7e 100644 --- a/bolt/lib/Core/BinaryEmitter.cpp +++ b/bolt/lib/Core/BinaryEmitter.cpp @@ -437,9 +437,9 @@ bool BinaryEmitter::emitFunction(BinaryFunction &Function, Streamer.emitELFSize(StartSymbol, SizeExpr); } - // TODO: Emit line info end for all the CUs that contain the function. if (opts::UpdateDebugSections && !Function.getDWARFUnits().empty()) - emitLineInfoEnd(Function, EndSymbol, Function.getDWARFUnits().front()); + for (DWARFUnit *Unit : Function.getDWARFUnits()) + emitLineInfoEnd(Function, EndSymbol, Unit); // Exception handling info for the function. emitLSDA(Function, FF); @@ -681,64 +681,92 @@ void BinaryEmitter::emitConstantIslands(BinaryFunction &BF, bool EmitColdPart, SMLoc BinaryEmitter::emitLineInfo(const BinaryFunction &BF, SMLoc NewLoc, SMLoc PrevLoc, bool FirstInstr, MCSymbol *&InstrLabel) { - // TODO: implment emitting into line tables corresponding to multiple CUs - DWARFUnit *FunctionCU = BF.getDWARFUnits().front(); - const DWARFDebugLine::LineTable *FunctionLineTable = - BF.getDWARFLineTableForUnit(FunctionCU); - assert(FunctionCU && "cannot emit line info for function without CU"); - - DebugLineTableRowRef RowReference = DebugLineTableRowRef::fromSMLoc(NewLoc); - - // Check if no new line info needs to be emitted. - if (RowReference == DebugLineTableRowRef::NULL_ROW || + if (NewLoc.getPointer() == nullptr || NewLoc.getPointer() == PrevLoc.getPointer()) return PrevLoc; + const ClusteredRows *Cluster = ClusteredRows::fromSMLoc(NewLoc); + + auto addToLineTable = [&](DebugLineTableRowRef RowReference, + const DWARFUnit *TargetCU, unsigned Flags, + MCSymbol *InstrLabel, + const DWARFDebugLine::Row &CurrentRow) { + const uint64_t TargetUnitIndex = TargetCU->getOffset(); + unsigned TargetFilenum = CurrentRow.File; + const uint32_t CurrentUnitIndex = RowReference.DwCompileUnitIndex; + // If the CU id from the current instruction location does not + // match the target CU id, it means that we have come across some + // inlined code (by BOLT). We must look up the CU for the instruction's + // original function and get the line table from that. + if (TargetUnitIndex != CurrentUnitIndex) { + // Add filename from the inlined function to the current CU. + TargetFilenum = BC.addDebugFilenameToUnit( + TargetUnitIndex, CurrentUnitIndex, CurrentRow.File); + } + BC.Ctx->setCurrentDwarfLoc(TargetFilenum, CurrentRow.Line, + CurrentRow.Column, Flags, CurrentRow.Isa, + CurrentRow.Discriminator); + const MCDwarfLoc &DwarfLoc = BC.Ctx->getCurrentDwarfLoc(); + BC.Ctx->clearDwarfLocSeen(); + auto &MapLineEntries = BC.getDwarfLineTable(TargetUnitIndex) + .getMCLineSections() + .getMCLineEntries(); + const auto *It = MapLineEntries.find(Streamer.getCurrentSectionOnly()); + auto NewLineEntry = MCDwarfLineEntry(InstrLabel, DwarfLoc); + + // Check if line table exists and has entries before doing comparison + if (It != MapLineEntries.end() && !It->second.empty()) { + // Check if the new line entry has the same debug info as the last one + // to avoid duplicates. We don't compare labels since different + // instructions can have the same line info. + const auto &LastEntry = It->second.back(); + if (LastEntry.getFileNum() == NewLineEntry.getFileNum() && + LastEntry.getLine() == NewLineEntry.getLine() && + LastEntry.getColumn() == NewLineEntry.getColumn() && + LastEntry.getFlags() == NewLineEntry.getFlags() && + LastEntry.getIsa() == NewLineEntry.getIsa() && + LastEntry.getDiscriminator() == NewLineEntry.getDiscriminator()) + return; + } - unsigned CurrentFilenum = 0; - const DWARFDebugLine::LineTable *CurrentLineTable = FunctionLineTable; - - // If the CU id from the current instruction location does not - // match the CU id from the current function, it means that we - // have come across some inlined code. We must look up the CU - // for the instruction's original function and get the line table - // from that. - const uint64_t FunctionUnitIndex = FunctionCU->getOffset(); - const uint32_t CurrentUnitIndex = RowReference.DwCompileUnitIndex; - if (CurrentUnitIndex != FunctionUnitIndex) { - CurrentLineTable = BC.DwCtx->getLineTableForUnit( - BC.DwCtx->getCompileUnitForOffset(CurrentUnitIndex)); - // Add filename from the inlined function to the current CU. - CurrentFilenum = BC.addDebugFilenameToUnit( - FunctionUnitIndex, CurrentUnitIndex, - CurrentLineTable->Rows[RowReference.RowIndex - 1].File); - } - - const DWARFDebugLine::Row &CurrentRow = - CurrentLineTable->Rows[RowReference.RowIndex - 1]; - if (!CurrentFilenum) - CurrentFilenum = CurrentRow.File; - - unsigned Flags = (DWARF2_FLAG_IS_STMT * CurrentRow.IsStmt) | - (DWARF2_FLAG_BASIC_BLOCK * CurrentRow.BasicBlock) | - (DWARF2_FLAG_PROLOGUE_END * CurrentRow.PrologueEnd) | - (DWARF2_FLAG_EPILOGUE_BEGIN * CurrentRow.EpilogueBegin); - - // Always emit is_stmt at the beginning of function fragment. - if (FirstInstr) - Flags |= DWARF2_FLAG_IS_STMT; - - BC.Ctx->setCurrentDwarfLoc(CurrentFilenum, CurrentRow.Line, CurrentRow.Column, - Flags, CurrentRow.Isa, CurrentRow.Discriminator); - const MCDwarfLoc &DwarfLoc = BC.Ctx->getCurrentDwarfLoc(); - BC.Ctx->clearDwarfLocSeen(); + BC.getDwarfLineTable(TargetUnitIndex) + .getMCLineSections() + .addLineEntry(NewLineEntry, Streamer.getCurrentSectionOnly()); + }; if (!InstrLabel) InstrLabel = BC.Ctx->createTempSymbol(); - - BC.getDwarfLineTable(FunctionUnitIndex) - .getMCLineSections() - .addLineEntry(MCDwarfLineEntry(InstrLabel, DwarfLoc), - Streamer.getCurrentSectionOnly()); + for (DebugLineTableRowRef RowReference : Cluster->getRows()) { + const DWARFDebugLine::LineTable *CurrentLineTable = + BC.DwCtx->getLineTableForUnit( + BC.DwCtx->getCompileUnitForOffset(RowReference.DwCompileUnitIndex)); + const DWARFDebugLine::Row &CurrentRow = + CurrentLineTable->Rows[RowReference.RowIndex - 1]; + unsigned Flags = (DWARF2_FLAG_IS_STMT * CurrentRow.IsStmt) | + (DWARF2_FLAG_BASIC_BLOCK * CurrentRow.BasicBlock) | + (DWARF2_FLAG_PROLOGUE_END * CurrentRow.PrologueEnd) | + (DWARF2_FLAG_EPILOGUE_BEGIN * CurrentRow.EpilogueBegin); + + // Always emit is_stmt at the beginning of function fragment. + if (FirstInstr) + Flags |= DWARF2_FLAG_IS_STMT; + const auto &FunctionDwarfUnits = BF.getDWARFUnits(); + const auto *It = std::find_if( + FunctionDwarfUnits.begin(), FunctionDwarfUnits.end(), + [RowReference](const DWARFUnit *Unit) { + return Unit->getOffset() == RowReference.DwCompileUnitIndex; + }); + if (It != FunctionDwarfUnits.end()) { + addToLineTable(RowReference, *It, Flags, InstrLabel, CurrentRow); + continue; + } + // This rows is from CU that did not contain the original function. + // This might happen if BOLT moved/inlined that instruction from other CUs. + // In this case, we need to insert it to all CUs that the function + // originally beloned to. + for (const DWARFUnit *Unit : BF.getDWARFUnits()) { + addToLineTable(RowReference, Unit, Flags, InstrLabel, CurrentRow); + } + } return NewLoc; } diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index bbe04a17c0ad3..a3a6b31451441 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -179,37 +179,29 @@ template static bool emptyRange(const R &Range) { } /// Gets debug line information for the instruction located at the given -/// address in the original binary. The SMLoc's pointer is used -/// to point to this information, which is represented by a -/// DebugLineTableRowRef. The returned pointer is null if no debug line -/// information for this instruction was found. -static SMLoc findDebugLineInformationForInstructionAt( +/// address in the original binary. Returns an optional DebugLineTableRowRef +/// that references the corresponding row in the DWARF line table. Since binary +/// functions can span multiple compilation units, this function helps +/// associate instructions with their debug line information from the +/// appropriate CU. Returns std::nullopt if no debug line information for +/// this instruction was found. +static std::optional +findDebugLineInformationForInstructionAt( uint64_t Address, DWARFUnit *Unit, const DWARFDebugLine::LineTable *LineTable) { - // We use the pointer in SMLoc to store an instance of DebugLineTableRowRef, - // which occupies 64 bits. Thus, we can only proceed if the struct fits into - // the pointer itself. - static_assert( - sizeof(decltype(SMLoc().getPointer())) >= sizeof(DebugLineTableRowRef), - "Cannot fit instruction debug line information into SMLoc's pointer"); - - SMLoc NullResult = DebugLineTableRowRef::NULL_ROW.toSMLoc(); uint32_t RowIndex = LineTable->lookupAddress( {Address, object::SectionedAddress::UndefSection}); if (RowIndex == LineTable->UnknownRowIndex) - return NullResult; + return std::nullopt; assert(RowIndex < LineTable->Rows.size() && "Line Table lookup returned invalid index."); - decltype(SMLoc().getPointer()) Ptr; - DebugLineTableRowRef *InstructionLocation = - reinterpret_cast(&Ptr); - - InstructionLocation->DwCompileUnitIndex = Unit->getOffset(); - InstructionLocation->RowIndex = RowIndex + 1; + DebugLineTableRowRef InstructionLocation; + InstructionLocation.DwCompileUnitIndex = Unit->getOffset(); + InstructionLocation.RowIndex = RowIndex + 1; - return SMLoc::getFromPointer(Ptr); + return InstructionLocation; } static std::string buildSectionName(StringRef Prefix, StringRef Name, @@ -1496,15 +1488,23 @@ Error BinaryFunction::disassemble() { } add_instruction: - // TODO: Handle multiple DWARF compilation units properly. - // For now, use the first unit if available. if (!getDWARFUnits().empty()) { - DWARFUnit *FirstUnit = getDWARFUnits().front(); - const DWARFDebugLine::LineTable *LineTable = - getDWARFLineTableForUnit(FirstUnit); - if (LineTable) { - Instruction.setLoc(findDebugLineInformationForInstructionAt( - AbsoluteInstrAddr, FirstUnit, LineTable)); + SmallVector Rows; + for (DWARFUnit *Unit : getDWARFUnits()) { + const DWARFDebugLine::LineTable *LineTable = + getDWARFLineTableForUnit(Unit); + if (!LineTable) + continue; + if (std::optional RowRef = + findDebugLineInformationForInstructionAt(AbsoluteInstrAddr, + Unit, LineTable)) + Rows.emplace_back(*RowRef); + } + if (!Rows.empty()) { + ClusteredRows *Cluster = + BC.ClasteredRows.createClusteredRows(Rows.size()); + Cluster->populate(Rows); + Instruction.setLoc(Cluster->toSMLoc()); } } diff --git a/bolt/lib/Core/DebugData.cpp b/bolt/lib/Core/DebugData.cpp index 521eb8d91bbc0..e05f28f08572c 100644 --- a/bolt/lib/Core/DebugData.cpp +++ b/bolt/lib/Core/DebugData.cpp @@ -101,8 +101,6 @@ std::optional findAttributeInfo(const DWARFDie DIE, return findAttributeInfo(DIE, AbbrevDecl, *Index); } -const DebugLineTableRowRef DebugLineTableRowRef::NULL_ROW{0, 0}; - LLVM_ATTRIBUTE_UNUSED static void printLE64(const std::string &S) { for (uint32_t I = 0, Size = S.size(); I < Size; ++I) { diff --git a/bolt/test/Inputs/multi-cu-common.h b/bolt/test/Inputs/multi-cu-common.h new file mode 100644 index 0000000000000..aeb8076305dce --- /dev/null +++ b/bolt/test/Inputs/multi-cu-common.h @@ -0,0 +1,10 @@ +#ifndef MULTI_CU_COMMON_H +#define MULTI_CU_COMMON_H + +static inline int common_inline_function(int x) { + int result = x * 2; + result += 10; + return result; +} + +#endif // MULTI_CU_COMMON_H diff --git a/bolt/test/Inputs/multi-cu-file1.c b/bolt/test/Inputs/multi-cu-file1.c new file mode 100644 index 0000000000000..f3528b2acddb8 --- /dev/null +++ b/bolt/test/Inputs/multi-cu-file1.c @@ -0,0 +1,9 @@ +#include "multi-cu-common.h" +#include + +int main() { + int value = 5; + int result = common_inline_function(value); + printf("File1: Result is %d\n", result); + return 0; +} diff --git a/bolt/test/Inputs/multi-cu-file2.c b/bolt/test/Inputs/multi-cu-file2.c new file mode 100644 index 0000000000000..f33af72595afe --- /dev/null +++ b/bolt/test/Inputs/multi-cu-file2.c @@ -0,0 +1,8 @@ +#include "multi-cu-common.h" +#include + +void helper_function() { + int value = 10; + int result = common_inline_function(value); + printf("File2: Helper result is %d\n", result); +} diff --git a/bolt/test/Inputs/process-debug-line.sh b/bolt/test/Inputs/process-debug-line.sh new file mode 100755 index 0000000000000..b30408df922eb --- /dev/null +++ b/bolt/test/Inputs/process-debug-line.sh @@ -0,0 +1,101 @@ +#!/bin/sh + +# Script to process llvm-dwarfdump --debug-line output and create a normalized table +# Usage: process-debug-line.sh +# +# Output format: CU_FILE LINE COLUMN FILE_NAME [additional_info] +# This strips addresses to make rows unique and adds context about which CU and file each line belongs to + +if [ $# -ne 1 ]; then + echo "Usage: $0 " >&2 + exit 1 +fi + +debug_line_file="$1" + +if [ ! -f "$debug_line_file" ]; then + echo "Error: File '$debug_line_file' not found" >&2 + exit 1 +fi + +awk ' +BEGIN { + cu_count = 0 + current_cu_file = "" + # Initialize file names array + for (i = 0; i < 100; i++) current_file_names[i] = "" +} + +# Track debug_line sections (new CU) +/^debug_line\[/ { + cu_count++ + current_cu_file = "" + # Clear file names array for new CU + for (i = 0; i < 100; i++) current_file_names[i] = "" + next +} + +# Capture file names and their indices +/^file_names\[.*\]:/ { + # Extract file index using more portable regex + if (match($0, /file_names\[[[:space:]]*([0-9]+)\]:/, arr)) { + file_index = arr[1] + } else { + # Fallback parsing + gsub(/file_names\[/, "", $0) + gsub(/\]:.*/, "", $0) + gsub(/[[:space:]]/, "", $0) + file_index = $0 + } + + getline # Read the next line which contains the actual filename + if (match($0, /name:[[:space:]]*"([^"]*)"/, name_arr)) { + filename = name_arr[1] + current_file_names[file_index] = filename + + # Extract basename for main CU file (first .c/.cpp/.cc file we see) + if (current_cu_file == "" && match(filename, /([^\/]*\.(c|cpp|cc))$/, cu_arr)) { + current_cu_file = cu_arr[1] + } + } + next +} + +# Process line table entries +/^0x[0-9a-f]+/ { + # Parse the line entry: Address Line Column File ISA Discriminator OpIndex Flags + if (NF >= 4) { + line = $2 + column = $3 + file_index = $4 + + # Get the filename for this file index + filename = current_file_names[file_index] + if (filename == "") { + filename = "UNKNOWN_FILE_" file_index + } else { + # Extract just the basename using portable method + if (match(filename, /([^\/]*)$/, basename_arr)) { + filename = basename_arr[1] + } else { + # Fallback: use gsub + gsub(/.*\//, "", filename) + } + } + + # Build additional info (flags, etc.) + additional_info = "" + for (i = 8; i <= NF; i++) { + if (additional_info != "") additional_info = additional_info " " + additional_info = additional_info $i + } + + # Output normalized row: CU_FILE LINE COLUMN FILE_NAME [additional_info] + printf "%s %s %s %s", current_cu_file, line, column, filename + if (additional_info != "") { + printf " %s", additional_info + } + printf "\n" + } +} +' "$debug_line_file" diff --git a/bolt/test/X86/multi-cu-debug-line.test b/bolt/test/X86/multi-cu-debug-line.test new file mode 100644 index 0000000000000..430b281445db5 --- /dev/null +++ b/bolt/test/X86/multi-cu-debug-line.test @@ -0,0 +1,108 @@ +## Test that BOLT correctly handles debug line information for functions +## that belong to multiple compilation units (e.g., inline functions in +## common header files). The test covers two scenarios: +## 1. Normal processing: .debug_line section shows lines for the function +## in all CUs where it was compiled, with no duplicate rows within CUs +## 2. Functions not processed: When BOLT doesn't process functions (using +## --funcs with nonexistent function), original debug info is preserved + +# REQUIRES: system-linux + +## Compile test files with debug info +# RUN: %clang %cflags -O0 -g %S/../Inputs/multi-cu-file1.c %S/../Inputs/multi-cu-file2.c \ +# RUN: -I%S/../Inputs -o %t.exe -Wl,-q + +## Test 1: Normal BOLT processing (functions are processed/optimized) +# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections +# RUN: llvm-dwarfdump --debug-line %t.bolt > %t.debug-line.txt +# RUN: FileCheck %s --check-prefix=BASIC --input-file %t.debug-line.txt + +## Check that debug line information is present for both compilation units +# BASIC: debug_line[{{.*}}] +# BASIC: file_names[{{.*}}]: +# BASIC: name: "{{.*}}multi-cu-file1.c" +# BASIC: debug_line[{{.*}}] +# BASIC: file_names[{{.*}}]: +# BASIC: name: "{{.*}}multi-cu-file2.c" + +## Use our helper script to create a normalized table without addresses +# RUN: %S/../Inputs/process-debug-line.sh %t.debug-line.txt > %t.normalized-debug-line.txt +# RUN: FileCheck %s --check-prefix=NORMALIZED --input-file %t.normalized-debug-line.txt + +## Check that we have line entries for the inline function (lines 5, 6, 7) from multi-cu-common.h +## in both compilation units +# NORMALIZED: multi-cu-file1.c 5 {{[0-9]+}} multi-cu-common.h +# NORMALIZED: multi-cu-file1.c 6 {{[0-9]+}} multi-cu-common.h +# NORMALIZED: multi-cu-file1.c 7 {{[0-9]+}} multi-cu-common.h +# NORMALIZED: multi-cu-file2.c 5 {{[0-9]+}} multi-cu-common.h +# NORMALIZED: multi-cu-file2.c 6 {{[0-9]+}} multi-cu-common.h +# NORMALIZED: multi-cu-file2.c 7 {{[0-9]+}} multi-cu-common.h + +## Verify that we have line entries for the inline function in multiple CUs +## by checking that the header file appears multiple times in different contexts +# RUN: grep -c "multi-cu-common.h" %t.debug-line.txt > %t.header-count.txt +# RUN: FileCheck %s --check-prefix=MULTI-CU --input-file %t.header-count.txt + +## The header should appear in debug line info for multiple CUs +# MULTI-CU: {{[2-9]|[1-9][0-9]+}} + +## Check that there are no duplicate line table rows within the same CU +## This verifies the fix for the bug where duplicate entries were created +# RUN: sort %t.normalized-debug-line.txt | uniq -c | \ +# RUN: awk '$1 > 1 {print "DUPLICATE_ROW: " $0}' > %t.duplicates.txt +# RUN: FileCheck %s --check-prefix=NO-DUPLICATES --input-file %t.duplicates.txt --allow-empty + +## Should have no duplicate normalized rows (file should be empty) +## Note: Cross-CU duplicates are expected and valid (same function in different CUs) +## but within-CU duplicates would indicate a bug +# NO-DUPLICATES-NOT: DUPLICATE_ROW + +## Test 2: Functions not processed by BOLT (using --funcs with nonexistent function) +## This tests the code path where BOLT preserves original debug info +# RUN: llvm-bolt %t.exe -o %t.not-emitted.bolt --update-debug-sections --funcs=nonexistent_function +# RUN: llvm-dwarfdump --debug-line %t.not-emitted.bolt > %t.not-emitted.debug-line.txt +# RUN: FileCheck %s --check-prefix=PRESERVED-BASIC --input-file %t.not-emitted.debug-line.txt + +## Check that debug line information is still present for both compilation units when functions aren't processed +# PRESERVED-BASIC: debug_line[{{.*}}] +# PRESERVED-BASIC: file_names[{{.*}}]: +# PRESERVED-BASIC: name: "{{.*}}multi-cu-file1.c" +# PRESERVED-BASIC: debug_line[{{.*}}] +# PRESERVED-BASIC: file_names[{{.*}}]: +# PRESERVED-BASIC: name: "{{.*}}multi-cu-file2.c" + +## Create normalized output for the not-emitted case +# RUN: %S/../Inputs/process-debug-line.sh %t.not-emitted.debug-line.txt > %t.not-emitted.normalized.txt +# RUN: FileCheck %s --check-prefix=PRESERVED-NORMALIZED --input-file %t.not-emitted.normalized.txt + +## Check that we have line entries for the inline function (lines 5, 6, 7) from multi-cu-common.h +## in both compilation units (preserved from original) +# PRESERVED-NORMALIZED: multi-cu-file1.c 5 {{[0-9]+}} multi-cu-common.h +# PRESERVED-NORMALIZED: multi-cu-file1.c 6 {{[0-9]+}} multi-cu-common.h +# PRESERVED-NORMALIZED: multi-cu-file1.c 7 {{[0-9]+}} multi-cu-common.h +# PRESERVED-NORMALIZED: multi-cu-file2.c 5 {{[0-9]+}} multi-cu-common.h +# PRESERVED-NORMALIZED: multi-cu-file2.c 6 {{[0-9]+}} multi-cu-common.h +# PRESERVED-NORMALIZED: multi-cu-file2.c 7 {{[0-9]+}} multi-cu-common.h + +## Verify that we have line entries for the inline function in multiple CUs (preserved) +## by checking that the header file appears multiple times in different contexts +# RUN: grep -c "multi-cu-common.h" %t.not-emitted.debug-line.txt > %t.preserved-header-count.txt +# RUN: FileCheck %s --check-prefix=PRESERVED-MULTI-CU --input-file %t.preserved-header-count.txt + +## The header should appear in debug line info for multiple CUs (preserved from original) +# PRESERVED-MULTI-CU: {{[2-9]|[1-9][0-9]+}} + +## Check that original debug info is preserved for main functions +# RUN: grep "multi-cu-file1.c.*multi-cu-file1.c" %t.not-emitted.normalized.txt > %t.preserved-main.txt +# RUN: FileCheck %s --check-prefix=PRESERVED-MAIN --input-file %t.preserved-main.txt + +# PRESERVED-MAIN: multi-cu-file1.c {{[0-9]+}} {{[0-9]+}} multi-cu-file1.c + +## Check that original debug info is preserved for file2 functions +# RUN: grep "multi-cu-file2.c.*multi-cu-file2.c" %t.not-emitted.normalized.txt > %t.preserved-file2.txt +# RUN: FileCheck %s --check-prefix=PRESERVED-FILE2 --input-file %t.preserved-file2.txt + +# PRESERVED-FILE2: multi-cu-file2.c {{[0-9]+}} {{[0-9]+}} multi-cu-file2.c + +## Note: We do not check for duplicates in Test 2 since we are preserving original debug info as-is +## and the original may contain patterns that would be flagged as duplicates by our normalization \ No newline at end of file diff --git a/bolt/test/perf2bolt/Inputs/perf_test.lds b/bolt/test/perf2bolt/Inputs/perf_test.lds index 66d925a05bebc..c2704d73a638c 100644 --- a/bolt/test/perf2bolt/Inputs/perf_test.lds +++ b/bolt/test/perf2bolt/Inputs/perf_test.lds @@ -1,13 +1,12 @@ SECTIONS { - . = SIZEOF_HEADERS; + . = 0x400000 + SIZEOF_HEADERS; .interp : { *(.interp) } .note.gnu.build-id : { *(.note.gnu.build-id) } - . = 0x212e8; .dynsym : { *(.dynsym) } - . = 0x31860; + . = 0x801000; .text : { *(.text*) } - . = 0x41c20; + . = 0x803000; .fini_array : { *(.fini_array) } - . = 0x54e18; + . = 0x805000; .data : { *(.data) } -} \ No newline at end of file +} diff --git a/bolt/unittests/Core/CMakeLists.txt b/bolt/unittests/Core/CMakeLists.txt index 54e8ea10cda12..538add9baa798 100644 --- a/bolt/unittests/Core/CMakeLists.txt +++ b/bolt/unittests/Core/CMakeLists.txt @@ -7,6 +7,7 @@ set(LLVM_LINK_COMPONENTS add_bolt_unittest(CoreTests BinaryContext.cpp + ClusteredRows.cpp MCPlusBuilder.cpp MemoryMaps.cpp DynoStats.cpp diff --git a/bolt/unittests/Core/ClusteredRows.cpp b/bolt/unittests/Core/ClusteredRows.cpp new file mode 100644 index 0000000000000..5901f9ac5aaaa --- /dev/null +++ b/bolt/unittests/Core/ClusteredRows.cpp @@ -0,0 +1,152 @@ +//===- bolt/unittest/Core/ClusteredRows.cpp ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "bolt/Core/DebugData.h" +#include "llvm/Support/SMLoc.h" +#include "gtest/gtest.h" +#include + +using namespace llvm; +using namespace llvm::bolt; + +namespace { + +class ClusteredRowsTest : public ::testing::Test { +protected: + void SetUp() override { + Container = std::make_unique(); + } + + std::unique_ptr Container; +}; + +TEST_F(ClusteredRowsTest, CreateSingleElement) { + ClusteredRows *CR = Container->createClusteredRows(1); + ASSERT_NE(CR, nullptr); + EXPECT_EQ(CR->size(), 1u); + + // Test population with single element + std::vector TestRefs = { + {42, 100} + }; + CR->populate(TestRefs); + + ArrayRef Rows = CR->getRows(); + EXPECT_EQ(Rows.size(), 1u); + EXPECT_EQ(Rows[0].DwCompileUnitIndex, 42u); + EXPECT_EQ(Rows[0].RowIndex, 100u); +} + +TEST_F(ClusteredRowsTest, CreateMultipleElements) { + ClusteredRows *CR = Container->createClusteredRows(3); + ASSERT_NE(CR, nullptr); + EXPECT_EQ(CR->size(), 3u); + + // Test population with multiple elements + std::vector TestRefs = { + {10, 20}, + {30, 40}, + {50, 60} + }; + CR->populate(TestRefs); + + ArrayRef Rows = CR->getRows(); + EXPECT_EQ(Rows.size(), 3u); + + EXPECT_EQ(Rows[0].DwCompileUnitIndex, 10u); + EXPECT_EQ(Rows[0].RowIndex, 20u); + + EXPECT_EQ(Rows[1].DwCompileUnitIndex, 30u); + EXPECT_EQ(Rows[1].RowIndex, 40u); + + EXPECT_EQ(Rows[2].DwCompileUnitIndex, 50u); + EXPECT_EQ(Rows[2].RowIndex, 60u); +} + +TEST_F(ClusteredRowsTest, SMLoc_Conversion) { + ClusteredRows *CR = Container->createClusteredRows(2); + ASSERT_NE(CR, nullptr); + + // Test SMLoc conversion + SMLoc Loc = CR->toSMLoc(); + EXPECT_TRUE(Loc.isValid()); + + // Test round-trip conversion + const ClusteredRows *CR2 = ClusteredRows::fromSMLoc(Loc); + EXPECT_EQ(CR, CR2); + EXPECT_EQ(CR2->size(), 2u); +} + +TEST_F(ClusteredRowsTest, PopulateWithArrayRef) { + ClusteredRows *CR = Container->createClusteredRows(4); + ASSERT_NE(CR, nullptr); + + // Test population with ArrayRef + DebugLineTableRowRef TestArray[] = { + {1, 2}, + {3, 4}, + {5, 6}, + {7, 8} + }; + ArrayRef TestRefs(TestArray, 4); + CR->populate(TestRefs); + + ArrayRef Rows = CR->getRows(); + EXPECT_EQ(Rows.size(), 4u); + + for (size_t i = 0; i < 4; ++i) { + EXPECT_EQ(Rows[i].DwCompileUnitIndex, TestArray[i].DwCompileUnitIndex); + EXPECT_EQ(Rows[i].RowIndex, TestArray[i].RowIndex); + } +} + +TEST_F(ClusteredRowsTest, MultipleClusteredRows) { + // Test creating multiple ClusteredRows objects + ClusteredRows *CR1 = Container->createClusteredRows(2); + ClusteredRows *CR2 = Container->createClusteredRows(3); + ClusteredRows *CR3 = Container->createClusteredRows(1); + + ASSERT_NE(CR1, nullptr); + ASSERT_NE(CR2, nullptr); + ASSERT_NE(CR3, nullptr); + + // Ensure they are different objects + EXPECT_NE(CR1, CR2); + EXPECT_NE(CR2, CR3); + EXPECT_NE(CR1, CR3); + + // Verify sizes + EXPECT_EQ(CR1->size(), 2u); + EXPECT_EQ(CR2->size(), 3u); + EXPECT_EQ(CR3->size(), 1u); + + // Populate each with different data + std::vector TestRefs1 = {{100, 200}, {300, 400}}; + std::vector TestRefs2 = {{10, 20}, {30, 40}, {50, 60}}; + std::vector TestRefs3 = {{999, 888}}; + + CR1->populate(TestRefs1); + CR2->populate(TestRefs2); + CR3->populate(TestRefs3); + + // Verify data integrity + ArrayRef Rows1 = CR1->getRows(); + ArrayRef Rows2 = CR2->getRows(); + ArrayRef Rows3 = CR3->getRows(); + + EXPECT_EQ(Rows1[0].DwCompileUnitIndex, 100u); + EXPECT_EQ(Rows1[1].RowIndex, 400u); + + EXPECT_EQ(Rows2[1].DwCompileUnitIndex, 30u); + EXPECT_EQ(Rows2[2].RowIndex, 60u); + + EXPECT_EQ(Rows3[0].DwCompileUnitIndex, 999u); + EXPECT_EQ(Rows3[0].RowIndex, 888u); +} + +} // namespace From abcd69590b944e24605c292ae835596115f9284d Mon Sep 17 00:00:00 2001 From: Grigory Pastukhov Date: Tue, 29 Jul 2025 14:04:57 -0700 Subject: [PATCH 3/7] Fix lint issues --- bolt/lib/Core/BinaryContext.cpp | 6 ++---- bolt/unittests/Core/ClusteredRows.cpp | 17 +++-------------- 2 files changed, 5 insertions(+), 18 deletions(-) diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index 1766b0540a5cd..df151f398bd54 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -1575,14 +1575,12 @@ unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID, StringRef Dir = ""; if (FileNameEntry.DirIdx != 0) { if (std::optional DirName = dwarf::toString( - LineTable->Prologue - .IncludeDirectories[FileNameEntry.DirIdx - 1])) { + LineTable->Prologue.IncludeDirectories[FileNameEntry.DirIdx - 1])) { Dir = *DirName; } } StringRef FileName = ""; - if (std::optional FName = - dwarf::toString(FileNameEntry.Name)) + if (std::optional FName = dwarf::toString(FileNameEntry.Name)) FileName = *FName; assert(FileName != ""); DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID); diff --git a/bolt/unittests/Core/ClusteredRows.cpp b/bolt/unittests/Core/ClusteredRows.cpp index 5901f9ac5aaaa..a75209a75dfad 100644 --- a/bolt/unittests/Core/ClusteredRows.cpp +++ b/bolt/unittests/Core/ClusteredRows.cpp @@ -31,9 +31,7 @@ TEST_F(ClusteredRowsTest, CreateSingleElement) { EXPECT_EQ(CR->size(), 1u); // Test population with single element - std::vector TestRefs = { - {42, 100} - }; + std::vector TestRefs = {{42, 100}}; CR->populate(TestRefs); ArrayRef Rows = CR->getRows(); @@ -48,11 +46,7 @@ TEST_F(ClusteredRowsTest, CreateMultipleElements) { EXPECT_EQ(CR->size(), 3u); // Test population with multiple elements - std::vector TestRefs = { - {10, 20}, - {30, 40}, - {50, 60} - }; + std::vector TestRefs = {{10, 20}, {30, 40}, {50, 60}}; CR->populate(TestRefs); ArrayRef Rows = CR->getRows(); @@ -87,12 +81,7 @@ TEST_F(ClusteredRowsTest, PopulateWithArrayRef) { ASSERT_NE(CR, nullptr); // Test population with ArrayRef - DebugLineTableRowRef TestArray[] = { - {1, 2}, - {3, 4}, - {5, 6}, - {7, 8} - }; + DebugLineTableRowRef TestArray[] = {{1, 2}, {3, 4}, {5, 6}, {7, 8}}; ArrayRef TestRefs(TestArray, 4); CR->populate(TestRefs); From 9391b3a7861ed4495b7ad3308dd7a0d46e1a008f Mon Sep 17 00:00:00 2001 From: Grigory Pastukhov Date: Tue, 29 Jul 2025 16:28:32 -0700 Subject: [PATCH 4/7] Modify awk script to make it work with older versions --- bolt/test/Inputs/process-debug-line.sh | 52 ++++++++++++++------------ 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/bolt/test/Inputs/process-debug-line.sh b/bolt/test/Inputs/process-debug-line.sh index b30408df922eb..44cbcd1e5984a 100755 --- a/bolt/test/Inputs/process-debug-line.sh +++ b/bolt/test/Inputs/process-debug-line.sh @@ -23,7 +23,9 @@ BEGIN { cu_count = 0 current_cu_file = "" # Initialize file names array - for (i = 0; i < 100; i++) current_file_names[i] = "" + for (i = 0; i < 100; i++) { + current_file_names[i] = "" + } } # Track debug_line sections (new CU) @@ -31,31 +33,34 @@ BEGIN { cu_count++ current_cu_file = "" # Clear file names array for new CU - for (i = 0; i < 100; i++) current_file_names[i] = "" + for (i = 0; i < 100; i++) { + current_file_names[i] = "" + } next } # Capture file names and their indices /^file_names\[.*\]:/ { - # Extract file index using more portable regex - if (match($0, /file_names\[[[:space:]]*([0-9]+)\]:/, arr)) { - file_index = arr[1] - } else { - # Fallback parsing - gsub(/file_names\[/, "", $0) - gsub(/\]:.*/, "", $0) - gsub(/[[:space:]]/, "", $0) - file_index = $0 - } + # Extract file index using simple string operations + line_copy = $0 + gsub(/file_names\[/, "", line_copy) + gsub(/\]:.*/, "", line_copy) + gsub(/[ \t]/, "", line_copy) + file_index = line_copy getline # Read the next line which contains the actual filename - if (match($0, /name:[[:space:]]*"([^"]*)"/, name_arr)) { - filename = name_arr[1] + # Extract filename from name: "filename" format + if (match($0, /name:[ \t]*"/)) { + filename = $0 + gsub(/.*name:[ \t]*"/, "", filename) + gsub(/".*/, "", filename) current_file_names[file_index] = filename # Extract basename for main CU file (first .c/.cpp/.cc file we see) - if (current_cu_file == "" && match(filename, /([^\/]*\.(c|cpp|cc))$/, cu_arr)) { - current_cu_file = cu_arr[1] + if (current_cu_file == "" && match(filename, /\.(c|cpp|cc)$/)) { + cu_filename = filename + gsub(/.*\//, "", cu_filename) + current_cu_file = cu_filename } } next @@ -74,19 +79,18 @@ BEGIN { if (filename == "") { filename = "UNKNOWN_FILE_" file_index } else { - # Extract just the basename using portable method - if (match(filename, /([^\/]*)$/, basename_arr)) { - filename = basename_arr[1] - } else { - # Fallback: use gsub - gsub(/.*\//, "", filename) - } + # Extract just the basename + basename = filename + gsub(/.*\//, "", basename) + filename = basename } # Build additional info (flags, etc.) additional_info = "" for (i = 8; i <= NF; i++) { - if (additional_info != "") additional_info = additional_info " " + if (additional_info != "") { + additional_info = additional_info " " + } additional_info = additional_info $i } From 41edb2b8c492125e89732b58dc3e7fda333bf015 Mon Sep 17 00:00:00 2001 From: Grigory Pastukhov Date: Tue, 29 Jul 2025 17:22:00 -0700 Subject: [PATCH 5/7] Minor fixes --- bolt/include/bolt/Core/BinaryFunction.h | 2 +- bolt/lib/Core/BinaryContext.cpp | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index 966559e0c6fa6..ec56ff3e37dd2 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -2425,7 +2425,7 @@ class BinaryFunction { } /// Return DWARF compile units for this function. - const SmallVector getDWARFUnits() const { + const SmallVector& getDWARFUnits() const { return DwarfUnitVec; } diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index df151f398bd54..6cbb17bd4e926 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -1718,9 +1718,15 @@ void BinaryContext::preprocessDebugInfo() { // Clear debug info for functions from units that we are not going to process. for (auto &KV : BinaryFunctions) { BinaryFunction &BF = KV.second; + // Collect units to remove to avoid iterator invalidation + SmallVector UnitsToRemove; for (auto *Unit : BF.getDWARFUnits()) { if (!ProcessedCUs.count(Unit)) - BF.removeDWARFUnit(Unit); + UnitsToRemove.push_back(Unit); + } + // Remove the collected units + for (auto *Unit : UnitsToRemove) { + BF.removeDWARFUnit(Unit); } } From 44bf8bb419cc79a58a7a0aabf5778fcad81de142 Mon Sep 17 00:00:00 2001 From: Grigory Pastukhov Date: Wed, 30 Jul 2025 19:41:02 -0700 Subject: [PATCH 6/7] Fixed typos --- bolt/include/bolt/Core/BinaryContext.h | 2 +- bolt/include/bolt/Core/DebugData.h | 32 ++++++++------------------ bolt/lib/Core/BinaryFunction.cpp | 2 +- bolt/unittests/Core/ClusteredRows.cpp | 4 ++-- 4 files changed, 13 insertions(+), 27 deletions(-) diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index 48bc9a5d1f92c..72c8817daa714 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -292,7 +292,7 @@ class BinaryContext { /// tables associated with instructions. Since binary functions can span /// multiple compilation units, instructions may reference debug line /// information from multiple CUs. - ClasteredRowsContainer ClasteredRows; + ClusteredRowsContainer ClusteredRows; // Setup MCPlus target builder void initializeTarget(std::unique_ptr TargetBuilder) { diff --git a/bolt/include/bolt/Core/DebugData.h b/bolt/include/bolt/Core/DebugData.h index 048594946d8a9..adbce0bb1d5b6 100644 --- a/bolt/include/bolt/Core/DebugData.h +++ b/bolt/include/bolt/Core/DebugData.h @@ -834,9 +834,6 @@ class DwarfLineTable { }; /// ClusteredRows represents a collection of debug line table row references. -/// Since a Binary function can belong to multiple compilation units (CUs), -/// a single MCInst can have multiple debug line table rows associated with it -/// from different CUs. This class manages such clustered row references. /// /// MEMORY LAYOUT AND DESIGN: /// This class uses a flexible array member pattern to store all @@ -846,7 +843,7 @@ class DwarfLineTable { /// +------------------+ /// | ClusteredRows | <- Object header (Size + first element) /// | - Size | -/// | - Raws (element) | <- First DebugLineTableRowRef element +/// | - Rows (element) | <- First DebugLineTableRowRef element /// +------------------+ /// | element[1] | <- Additional DebugLineTableRowRef elements /// | element[2] | stored immediately after the object @@ -854,18 +851,7 @@ class DwarfLineTable { /// | element[Size-1] | /// +------------------+ /// -/// PERFORMANCE BENEFITS: -/// - Single memory allocation: All elements are stored in one contiguous block, -/// eliminating the need for separate heap allocations for the array. -/// - No extra dereferencing: Elements are accessed directly via pointer -/// arithmetic (beginPtr() + offset) rather than through an additional -/// pointer indirection. -/// - Cache locality: All elements are guaranteed to be adjacent in memory, -/// improving cache performance during iteration. -/// - Memory efficiency: No overhead from separate pointer storage or -/// fragmented allocations. -/// -/// The 'Raws' member serves as both the first element storage and the base +/// The 'Rows' member serves as both the first element storage and the base /// address for pointer arithmetic to access subsequent elements. class ClusteredRows { public: @@ -891,7 +877,7 @@ class ClusteredRows { private: uint64_t Size; - DebugLineTableRowRef Raws; + DebugLineTableRowRef Rows; ClusteredRows(uint64_t Size) : Size(Size) {} static uint64_t getTotalSize(uint64_t Size) { @@ -899,17 +885,17 @@ class ClusteredRows { return sizeof(ClusteredRows) + (Size - 1) * sizeof(DebugLineTableRowRef); } const DebugLineTableRowRef *beginPtrConst() const { - return reinterpret_cast(&Raws); + return reinterpret_cast(&Rows); } DebugLineTableRowRef *beginPtr() { - return reinterpret_cast(&Raws); + return reinterpret_cast(&Rows); } - friend class ClasteredRowsContainer; + friend class ClusteredRowsContainer; }; -/// ClasteredRowsContainer manages the lifecycle of ClusteredRows objects. -class ClasteredRowsContainer { +/// ClusteredRowsContainer manages the lifecycle of ClusteredRows objects. +class ClusteredRowsContainer { public: ClusteredRows *createClusteredRows(uint64_t Size) { auto *CR = new (std::malloc(ClusteredRows::getTotalSize(Size))) @@ -917,7 +903,7 @@ class ClasteredRowsContainer { Clusters.push_back(CR); return CR; } - ~ClasteredRowsContainer() { + ~ClusteredRowsContainer() { for (auto *CR : Clusters) std::free(CR); } diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index a3a6b31451441..8635cba006991 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -1502,7 +1502,7 @@ Error BinaryFunction::disassemble() { } if (!Rows.empty()) { ClusteredRows *Cluster = - BC.ClasteredRows.createClusteredRows(Rows.size()); + BC.ClusteredRows.createClusteredRows(Rows.size()); Cluster->populate(Rows); Instruction.setLoc(Cluster->toSMLoc()); } diff --git a/bolt/unittests/Core/ClusteredRows.cpp b/bolt/unittests/Core/ClusteredRows.cpp index a75209a75dfad..4665022c91fdd 100644 --- a/bolt/unittests/Core/ClusteredRows.cpp +++ b/bolt/unittests/Core/ClusteredRows.cpp @@ -19,10 +19,10 @@ namespace { class ClusteredRowsTest : public ::testing::Test { protected: void SetUp() override { - Container = std::make_unique(); + Container = std::make_unique(); } - std::unique_ptr Container; + std::unique_ptr Container; }; TEST_F(ClusteredRowsTest, CreateSingleElement) { From 04663a0cf062005b1f6abae2d1e0078396e36863 Mon Sep 17 00:00:00 2001 From: Grigory Pastukhov Date: Thu, 31 Jul 2025 10:45:31 -0700 Subject: [PATCH 7/7] Made the test architecture independent --- bolt/test/lit.cfg.py | 4 ++-- bolt/test/{X86 => }/multi-cu-debug-line.test | 14 +++++++------- .../process-debug-line.sh => process-debug-line} | 0 3 files changed, 9 insertions(+), 9 deletions(-) rename bolt/test/{X86 => }/multi-cu-debug-line.test (93%) rename bolt/test/{Inputs/process-debug-line.sh => process-debug-line} (100%) diff --git a/bolt/test/lit.cfg.py b/bolt/test/lit.cfg.py index 0d05229be2bf3..508db8b890190 100644 --- a/bolt/test/lit.cfg.py +++ b/bolt/test/lit.cfg.py @@ -10,8 +10,7 @@ import lit.util from lit.llvm import llvm_config -from lit.llvm.subst import ToolSubst -from lit.llvm.subst import FindTool +from lit.llvm.subst import FindTool, ToolSubst # Configuration file for the 'lit' test runner. @@ -127,6 +126,7 @@ unresolved="fatal", extra_args=[link_fdata_cmd], ), + ToolSubst("process-debug-line", unresolved="fatal"), ToolSubst("merge-fdata", unresolved="fatal"), ToolSubst("llvm-readobj", unresolved="fatal"), ToolSubst("llvm-dwp", unresolved="fatal"), diff --git a/bolt/test/X86/multi-cu-debug-line.test b/bolt/test/multi-cu-debug-line.test similarity index 93% rename from bolt/test/X86/multi-cu-debug-line.test rename to bolt/test/multi-cu-debug-line.test index 430b281445db5..a94c901bbcc5a 100644 --- a/bolt/test/X86/multi-cu-debug-line.test +++ b/bolt/test/multi-cu-debug-line.test @@ -1,16 +1,16 @@ ## Test that BOLT correctly handles debug line information for functions ## that belong to multiple compilation units (e.g., inline functions in ## common header files). The test covers two scenarios: -## 1. Normal processing: .debug_line section shows lines for the function +## 1. Normal processing: .debug_line section shows lines for the function ## in all CUs where it was compiled, with no duplicate rows within CUs -## 2. Functions not processed: When BOLT doesn't process functions (using +## 2. Functions not processed: When BOLT doesn't process functions (using ## --funcs with nonexistent function), original debug info is preserved # REQUIRES: system-linux ## Compile test files with debug info -# RUN: %clang %cflags -O0 -g %S/../Inputs/multi-cu-file1.c %S/../Inputs/multi-cu-file2.c \ -# RUN: -I%S/../Inputs -o %t.exe -Wl,-q +# RUN: %clang %cflags -O0 -g %S/Inputs/multi-cu-file1.c %S/Inputs/multi-cu-file2.c \ +# RUN: -I%S/Inputs -o %t.exe -Wl,-q ## Test 1: Normal BOLT processing (functions are processed/optimized) # RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections @@ -26,7 +26,7 @@ # BASIC: name: "{{.*}}multi-cu-file2.c" ## Use our helper script to create a normalized table without addresses -# RUN: %S/../Inputs/process-debug-line.sh %t.debug-line.txt > %t.normalized-debug-line.txt +# RUN: process-debug-line %t.debug-line.txt > %t.normalized-debug-line.txt # RUN: FileCheck %s --check-prefix=NORMALIZED --input-file %t.normalized-debug-line.txt ## Check that we have line entries for the inline function (lines 5, 6, 7) from multi-cu-common.h @@ -72,7 +72,7 @@ # PRESERVED-BASIC: name: "{{.*}}multi-cu-file2.c" ## Create normalized output for the not-emitted case -# RUN: %S/../Inputs/process-debug-line.sh %t.not-emitted.debug-line.txt > %t.not-emitted.normalized.txt +# RUN: process-debug-line %t.not-emitted.debug-line.txt > %t.not-emitted.normalized.txt # RUN: FileCheck %s --check-prefix=PRESERVED-NORMALIZED --input-file %t.not-emitted.normalized.txt ## Check that we have line entries for the inline function (lines 5, 6, 7) from multi-cu-common.h @@ -105,4 +105,4 @@ # PRESERVED-FILE2: multi-cu-file2.c {{[0-9]+}} {{[0-9]+}} multi-cu-file2.c ## Note: We do not check for duplicates in Test 2 since we are preserving original debug info as-is -## and the original may contain patterns that would be flagged as duplicates by our normalization \ No newline at end of file +## and the original may contain patterns that would be flagged as duplicates by our normalization diff --git a/bolt/test/Inputs/process-debug-line.sh b/bolt/test/process-debug-line similarity index 100% rename from bolt/test/Inputs/process-debug-line.sh rename to bolt/test/process-debug-line