-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[AVR] Fix Avr indvar detection and strength reduction (missed optimization) #152028
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-clang Author: Tom Vijlbrief (tomtor) ChangesFix #151080 Full diff: https://github.com/llvm/llvm-project/pull/152028.diff 7 Files Affected:
diff --git a/clang/lib/Basic/Targets/AVR.h b/clang/lib/Basic/Targets/AVR.h
index 75c969fd59dc9..11aa844f2ce55 100644
--- a/clang/lib/Basic/Targets/AVR.h
+++ b/clang/lib/Basic/Targets/AVR.h
@@ -57,7 +57,8 @@ class LLVM_LIBRARY_VISIBILITY AVRTargetInfo : public TargetInfo {
Int16Type = SignedInt;
Char32Type = UnsignedLong;
SigAtomicType = SignedChar;
- resetDataLayout("e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8");
+ resetDataLayout(
+ "e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-n16:8-a:8");
}
void getTargetDefines(const LangOptions &Opts,
diff --git a/llvm/lib/Target/AVR/AVRTargetMachine.cpp b/llvm/lib/Target/AVR/AVRTargetMachine.cpp
index b75417a0896a5..8d05005e287f4 100644
--- a/llvm/lib/Target/AVR/AVRTargetMachine.cpp
+++ b/llvm/lib/Target/AVR/AVRTargetMachine.cpp
@@ -19,6 +19,7 @@
#include "AVR.h"
#include "AVRMachineFunctionInfo.h"
+#include "AVRTargetTransformInfo.h"
#include "AVRTargetObjectFile.h"
#include "MCTargetDesc/AVRMCTargetDesc.h"
#include "TargetInfo/AVRTargetInfo.h"
@@ -28,7 +29,7 @@
namespace llvm {
static const char *AVRDataLayout =
- "e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8";
+ "e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-n16:8-a:8";
/// Processes a CPU name.
static StringRef getCPU(StringRef CPU) {
@@ -62,7 +63,9 @@ namespace {
class AVRPassConfig : public TargetPassConfig {
public:
AVRPassConfig(AVRTargetMachine &TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {}
+ : TargetPassConfig(TM, PM) {
+ EnableLoopTermFold = true;
+ }
AVRTargetMachine &getAVRTargetMachine() const {
return getTM<AVRTargetMachine>();
@@ -107,6 +110,10 @@ const AVRSubtarget *AVRTargetMachine::getSubtargetImpl(const Function &) const {
return &SubTarget;
}
+TargetTransformInfo AVRTargetMachine::getTargetTransformInfo(const Function &F) const {
+ return TargetTransformInfo(std::make_unique<AVRTTIImpl>(this, F));
+}
+
MachineFunctionInfo *AVRTargetMachine::createMachineFunctionInfo(
BumpPtrAllocator &Allocator, const Function &F,
const TargetSubtargetInfo *STI) const {
diff --git a/llvm/lib/Target/AVR/AVRTargetMachine.h b/llvm/lib/Target/AVR/AVRTargetMachine.h
index 167d0076e9581..9452b3d8cd8a5 100644
--- a/llvm/lib/Target/AVR/AVRTargetMachine.h
+++ b/llvm/lib/Target/AVR/AVRTargetMachine.h
@@ -48,6 +48,8 @@ class AVRTargetMachine : public CodeGenTargetMachineImpl {
createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F,
const TargetSubtargetInfo *STI) const override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
+
bool isNoopAddrSpaceCast(unsigned SrcAs, unsigned DestAs) const override {
// While AVR has different address spaces, they are all represented by
// 16-bit pointers that can be freely casted between (of course, a pointer
diff --git a/llvm/lib/Target/AVR/AVRTargetTransformInfo.h b/llvm/lib/Target/AVR/AVRTargetTransformInfo.h
new file mode 100644
index 0000000000000..b877918a18151
--- /dev/null
+++ b/llvm/lib/Target/AVR/AVRTargetTransformInfo.h
@@ -0,0 +1,68 @@
+//===- AVRTargetTransformInfo.h - AVR specific TTI ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines a TargetTransformInfoImplBase conforming object specific
+/// to the AVR target machine. It uses the target's detailed information to
+/// provide more precise answers to certain TTI queries, while letting the
+/// target independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AVR_AVRTARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_AVR_AVRTARGETTRANSFORMINFO_H
+
+#include "AVRSubtarget.h"
+#include "AVRTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/IR/Function.h"
+#include <optional>
+
+namespace llvm {
+
+class AVRTTIImpl final : public BasicTTIImplBase<AVRTTIImpl> {
+ using BaseT = BasicTTIImplBase<AVRTTIImpl>;
+ using TTI = TargetTransformInfo;
+
+ friend BaseT;
+
+ const AVRSubtarget *ST;
+ const AVRTargetLowering *TLI;
+ const Function *currentF;
+
+ const AVRSubtarget *getST() const { return ST; }
+ const AVRTargetLowering *getTLI() const { return TLI; }
+
+public:
+ explicit AVRTTIImpl(const AVRTargetMachine *TM, const Function &F)
+ : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)),
+ TLI(ST->getTargetLowering()), currentF(&F) {}
+
+ bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
+ const TargetTransformInfo::LSRCost &C2) const override {
+ // Detect %incdec.ptr because loop-reduce loses them
+ for (const BasicBlock &BB : *currentF) {
+ if (BB.getName().find("while.body") != std::string::npos) {
+ for (const Instruction &I : BB) {
+ std::string str;
+ llvm::raw_string_ostream(str) << I;
+ if (str.find("%incdec.ptr") != std::string::npos)
+ return false;
+ }
+ }
+ }
+ if (C2.Insns == ~0u)
+ return true;
+ return 2 * C1.Insns + C1.AddRecCost + C1.SetupCost + C1.NumRegs <
+ 2 * C2.Insns + C2.AddRecCost + C2.SetupCost + C2.NumRegs;
+ }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AVR_AVRTARGETTRANSFORMINFO_H
diff --git a/llvm/test/CodeGen/AVR/bug-143247.ll b/llvm/test/CodeGen/AVR/bug-143247.ll
index 07c4c6562c950..d4493272af76d 100644
--- a/llvm/test/CodeGen/AVR/bug-143247.ll
+++ b/llvm/test/CodeGen/AVR/bug-143247.ll
@@ -8,18 +8,18 @@ define void @complex_sbi() {
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: push r16
; CHECK-NEXT: push r17
-; CHECK-NEXT: ldi r24, 0
+; CHECK-NEXT: ldi r24, 1
; CHECK-NEXT: ldi r25, 0
; CHECK-NEXT: .LBB0_1: ; %while.cond
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sbi 1, 7
-; CHECK-NEXT: adiw r24, 1
; CHECK-NEXT: movw r16, r24
; CHECK-NEXT: andi r24, 15
; CHECK-NEXT: andi r25, 0
; CHECK-NEXT: adiw r24, 1
; CHECK-NEXT: call nil
; CHECK-NEXT: movw r24, r16
+; CHECK-NEXT: adiw r24, 1
; CHECK-NEXT: rjmp .LBB0_1
entry:
br label %while.cond
diff --git a/llvm/test/CodeGen/AVR/load.ll b/llvm/test/CodeGen/AVR/load.ll
index 5de6b48652914..e2e7844b49c02 100644
--- a/llvm/test/CodeGen/AVR/load.ll
+++ b/llvm/test/CodeGen/AVR/load.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mattr=avr6,sram < %s -mtriple=avr -verify-machineinstrs | FileCheck %s
+; RUN: llc -mattr=avr6,sram < %s -mtriple=avr-none -verify-machineinstrs | FileCheck %s
define i8 @load8(ptr %x) {
; CHECK-LABEL: load8:
@@ -76,26 +76,26 @@ while.end: ; preds = %while.body, %entry
ret i8 %r.0.lcssa
}
-define i16 @load16postinc(ptr %x, i16 %y) {
+define i16 @load16postinc(ptr %p, i16 %cnt) {
; CHECK-LABEL: load16postinc:
; CHECK: ld {{.*}}, {{[XYZ]}}+
; CHECK: ld {{.*}}, {{[XYZ]}}+
entry:
- %tobool2 = icmp eq i16 %y, 0
- br i1 %tobool2, label %while.end, label %while.body
-while.body: ; preds = %entry, %while.body
- %r.05 = phi i16 [ %add, %while.body ], [ 0, %entry ]
- %y.addr.04 = phi i16 [ %dec, %while.body ], [ %y, %entry ]
- %x.addr.03 = phi ptr [ %incdec.ptr, %while.body ], [ %x, %entry ]
- %dec = add nsw i16 %y.addr.04, -1
- %incdec.ptr = getelementptr inbounds i16, ptr %x.addr.03, i16 1
- %0 = load i16, ptr %x.addr.03
- %add = add nsw i16 %0, %r.05
- %tobool = icmp eq i16 %dec, 0
- br i1 %tobool, label %while.end, label %while.body
-while.end: ; preds = %while.body, %entry
- %r.0.lcssa = phi i16 [ 0, %entry ], [ %add, %while.body ]
- ret i16 %r.0.lcssa
+ %cmp3 = icmp sgt i16 %cnt, 0
+ br i1 %cmp3, label %for.body, label %for.cond.cleanup
+for.cond.cleanup: ; preds = %for.body, %entry
+ %sum.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ]
+ ret i16 %sum.0.lcssa
+for.body: ; preds = %entry, %for.body
+ %i.06 = phi i16 [ %inc, %for.body ], [ 0, %entry ]
+ %sum.05 = phi i16 [ %add, %for.body ], [ 0, %entry ]
+ %p.addr.04 = phi ptr [ %incdec.ptr, %for.body ], [ %p, %entry ]
+ %incdec.ptr = getelementptr inbounds nuw i8, ptr %p.addr.04, i16 2
+ %0 = load i16, ptr %p.addr.04, align 1
+ %add = add nsw i16 %0, %sum.05
+ %inc = add nuw nsw i16 %i.06, 1
+ %exitcond.not = icmp eq i16 %inc, %cnt
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}
define i8 @load8predec(ptr %x, i8 %y) {
diff --git a/llvm/test/CodeGen/AVR/shift.ll b/llvm/test/CodeGen/AVR/shift.ll
index 9836f93527b3c..1bd9b45999d7b 100644
--- a/llvm/test/CodeGen/AVR/shift.ll
+++ b/llvm/test/CodeGen/AVR/shift.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=avr -mtriple=avr -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=avr-none -verify-machineinstrs | FileCheck %s
; Optimize for speed.
define i8 @shift_i8_i8_speed(i8 %a, i8 %b) {
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
loop-reduce does not preserve predecrement operations in loops which it cannot optimize. In functions which have an I see no easy way to do this on individual loops. @Patryk27 suggestions? |
; CHECK-LABEL: load16postinc: | ||
; CHECK: ld {{.*}}, {{[XYZ]}}+ | ||
; CHECK: ld {{.*}}, {{[XYZ]}}+ | ||
entry: | ||
%tobool2 = icmp eq i16 %y, 0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would be better to keep this old test, while adding your test as a new case..
@@ -57,7 +57,8 @@ class LLVM_LIBRARY_VISIBILITY AVRTargetInfo : public TargetInfo { | |||
Int16Type = SignedInt; | |||
Char32Type = UnsignedLong; | |||
SigAtomicType = SignedChar; | |||
resetDataLayout("e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8"); | |||
resetDataLayout( | |||
"e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-n16:8-a:8"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If this change has partial improvement on #151080, you can submit this without the TTI part.
Sub optimal results are also accepted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sub optimal results are also accepted. We should not introduce other issues for an extreme optimization.
const TargetTransformInfo::LSRCost &C2) const override { | ||
// Detect %incdec.ptr because loop-reduce loses them | ||
for (const BasicBlock &BB : *currentF) { | ||
if (BB.getName().find("while.body") != std::string::npos) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should not rely on names to find loop body, what if other compilers like flang-frontend and rustc do not use these names?
Fix #151080