[CIR] Add bit ffs operation (#150997)

Lancern · web-flow · commit 1249ab9a0364 · 2025-07-29T20:45:13.000+08:00
This patch adds the `cir.ffs` operation which corresponds to the
`__builtin_ffs` family of builtin functions.

This operation was not included in the previous PRs because the call to
`__builtin_ffs` would be transformed into a library call to `ffs`. At
the time of authoring this patch, this behavior has been changed and now
we can properly lower calls to `__builtin_ffs` to `cir.ffs`.
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -2911,6 +2911,28 @@ def CIR_BitCtzOp : CIR_BitZeroCountOpBase<"ctz",
   }];
 }
 
+def CIR_BitFfsOp : CIR_BitOpBase<"ffs", CIR_SIntOfWidths<[32, 64]>> {
+  let summary = "Get the position of the least significant 1-bit in input";
+  let description = [{
+    Compute the 1-based position of the least significant 1-bit of the input.
+
+    The input integer must be a signed integer. The `cir.ffs` operation returns
+    one plus the index of the least significant 1-bit of the input signed
+    integer. If the input integer is 0, `cir.ffs` yields 0.
+
+    Example:
+
+    ```mlir
+    !s32i = !cir.int<s, 32>
+
+    // %0 = 0x0010_1000
+    %0 = cir.const #cir.int<40> : !s32i
+    // #1 will be 4 since the 4th least significant bit is 1.
+    %1 = cir.ffs %0 : !s32i
+    ```
+  }];
+}
+
 def CIR_BitParityOp : CIR_BitOpBase<"parity", CIR_UIntOfWidths<[32, 64]>> {
   let summary = "Get the parity of input";
   let description = [{
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -190,6 +190,11 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
     assert(!cir::MissingFeatures::builtinCheckKind());
     return emitBuiltinBitOp<cir::BitClzOp>(*this, e, /*poisonZero=*/true);
 
+  case Builtin::BI__builtin_ffs:
+  case Builtin::BI__builtin_ffsl:
+  case Builtin::BI__builtin_ffsll:
+    return emitBuiltinBitOp<cir::BitFfsOp>(*this, e);
+
   case Builtin::BI__builtin_parity:
   case Builtin::BI__builtin_parityl:
   case Builtin::BI__builtin_parityll:
diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
@@ -2295,6 +2295,15 @@ OpFoldResult BitCtzOp::fold(FoldAdaptor adaptor) {
       getPoisonZero());
 }
 
+OpFoldResult BitFfsOp::fold(FoldAdaptor adaptor) {
+  return foldUnaryBitOp(adaptor.getInput(), [](const llvm::APInt &inputValue) {
+    unsigned trailingZeros = inputValue.countTrailingZeros();
+    unsigned result =
+        trailingZeros == inputValue.getBitWidth() ? 0 : trailingZeros + 1;
+    return llvm::APInt(inputValue.getBitWidth(), result);
+  });
+}
+
 OpFoldResult BitParityOp::fold(FoldAdaptor adaptor) {
   return foldUnaryBitOp(adaptor.getInput(), [](const llvm::APInt &inputValue) {
     return llvm::APInt(inputValue.getBitWidth(), inputValue.popcount() % 2);
diff --git a/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp b/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp
@@ -143,7 +143,7 @@ void CIRCanonicalizePass::runOnOperation() {
     if (isa<BrOp, BrCondOp, CastOp, ScopeOp, SwitchOp, SelectOp, UnaryOp,
             ComplexCreateOp, ComplexImagOp, ComplexRealOp, VecCmpOp,
             VecCreateOp, VecExtractOp, VecShuffleOp, VecShuffleDynamicOp,
-            VecTernaryOp, BitClrsbOp, BitClzOp, BitCtzOp, BitParityOp,
+            VecTernaryOp, BitClrsbOp, BitClzOp, BitCtzOp, BitFfsOp, BitParityOp,
             BitPopcountOp, BitReverseOp, ByteSwapOp, RotateOp>(op))
       ops.push_back(op);
   });
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -521,6 +521,32 @@ mlir::LogicalResult CIRToLLVMBitCtzOpLowering::matchAndRewrite(
   return mlir::LogicalResult::success();
 }
 
+mlir::LogicalResult CIRToLLVMBitFfsOpLowering::matchAndRewrite(
+    cir::BitFfsOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  auto resTy = getTypeConverter()->convertType(op.getType());
+  auto ctz = rewriter.create<mlir::LLVM::CountTrailingZerosOp>(
+      op.getLoc(), resTy, adaptor.getInput(), /*is_zero_poison=*/true);
+
+  auto one = rewriter.create<mlir::LLVM::ConstantOp>(op.getLoc(), resTy, 1);
+  auto ctzAddOne = rewriter.create<mlir::LLVM::AddOp>(op.getLoc(), ctz, one);
+
+  auto zeroInputTy = rewriter.create<mlir::LLVM::ConstantOp>(
+      op.getLoc(), adaptor.getInput().getType(), 0);
+  auto isZero = rewriter.create<mlir::LLVM::ICmpOp>(
+      op.getLoc(),
+      mlir::LLVM::ICmpPredicateAttr::get(rewriter.getContext(),
+                                         mlir::LLVM::ICmpPredicate::eq),
+      adaptor.getInput(), zeroInputTy);
+
+  auto zero = rewriter.create<mlir::LLVM::ConstantOp>(op.getLoc(), resTy, 0);
+  auto res = rewriter.create<mlir::LLVM::SelectOp>(op.getLoc(), isZero, zero,
+                                                   ctzAddOne);
+  rewriter.replaceOp(op, res);
+
+  return mlir::LogicalResult::success();
+}
+
 mlir::LogicalResult CIRToLLVMBitParityOpLowering::matchAndRewrite(
     cir::BitParityOp op, OpAdaptor adaptor,
     mlir::ConversionPatternRewriter &rewriter) const {
@@ -2089,6 +2115,7 @@ void ConvertCIRToLLVMPass::runOnOperation() {
                CIRToLLVMBitClrsbOpLowering,
                CIRToLLVMBitClzOpLowering,
                CIRToLLVMBitCtzOpLowering,
+               CIRToLLVMBitFfsOpLowering,
                CIRToLLVMBitParityOpLowering,
                CIRToLLVMBitPopcountOpLowering,
                CIRToLLVMBitReverseOpLowering,
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
@@ -84,6 +84,16 @@ class CIRToLLVMBitCtzOpLowering
                   mlir::ConversionPatternRewriter &) const override;
 };
 
+class CIRToLLVMBitFfsOpLowering
+    : public mlir::OpConversionPattern<cir::BitFfsOp> {
+public:
+  using mlir::OpConversionPattern<cir::BitFfsOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(cir::BitFfsOp op, OpAdaptor,
+                  mlir::ConversionPatternRewriter &) const override;
+};
+
 class CIRToLLVMBitParityOpLowering
     : public mlir::OpConversionPattern<cir::BitParityOp> {
 public:
diff --git a/clang/test/CIR/CodeGen/builtin_bit.cpp b/clang/test/CIR/CodeGen/builtin_bit.cpp
@@ -216,6 +216,78 @@ int test_builtin_clzg(unsigned x) {
 // OGCG-LABEL: _Z17test_builtin_clzgj
 // OGCG:         %{{.+}} = call i32 @llvm.ctlz.i32(i32 %{{.+}}, i1 true)
 
+int test_builtin_ffs(int x) {
+  return __builtin_ffs(x);
+}
+
+// CIR-LABEL: _Z16test_builtin_ffsi
+// CIR:         %{{.+}} = cir.ffs %{{.+}} : !s32i
+// CIR:       }
+
+// LLVM-LABEL: _Z16test_builtin_ffsi
+// LLVM:         %[[INPUT:.+]] = load i32, ptr %{{.+}}, align 4
+// LLVM-NEXT:    %[[CTZ:.+]] = call i32 @llvm.cttz.i32(i32 %[[INPUT]], i1 true)
+// LLVM-NEXT:    %[[R1:.+]] = add i32 %[[CTZ]], 1
+// LLVM-NEXT:    %[[IS_ZERO:.+]] = icmp eq i32 %[[INPUT]], 0
+// LLVM-NEXT:    %{{.+}} = select i1 %[[IS_ZERO]], i32 0, i32 %[[R1]]
+// LLVM:       }
+
+// OGCG-LABEL: _Z16test_builtin_ffsi
+// OGCG:         %[[INPUT:.+]] = load i32, ptr %{{.+}}, align 4
+// OGCG-NEXT:    %[[CTZ:.+]] = call i32 @llvm.cttz.i32(i32 %[[INPUT]], i1 true)
+// OGCG-NEXT:    %[[R1:.+]] = add i32 %[[CTZ]], 1
+// OGCG-NEXT:    %[[IS_ZERO:.+]] = icmp eq i32 %[[INPUT]], 0
+// OGCG-NEXT:    %{{.+}} = select i1 %[[IS_ZERO]], i32 0, i32 %[[R1]]
+// OGCG:       }
+
+int test_builtin_ffsl(long x) {
+  return __builtin_ffsl(x);
+}
+
+// CIR-LABEL: _Z17test_builtin_ffsll
+// CIR:         %{{.+}} = cir.ffs %{{.+}} : !s64i
+// CIR:       }
+
+// LLVM-LABEL: _Z17test_builtin_ffsll
+// LLVM:         %[[INPUT:.+]] = load i64, ptr %{{.+}}, align 8
+// LLVM-NEXT:    %[[CTZ:.+]] = call i64 @llvm.cttz.i64(i64 %[[INPUT]], i1 true)
+// LLVM-NEXT:    %[[R1:.+]] = add i64 %[[CTZ]], 1
+// LLVM-NEXT:    %[[IS_ZERO:.+]] = icmp eq i64 %[[INPUT]], 0
+// LLVM-NEXT:    %{{.+}} = select i1 %[[IS_ZERO]], i64 0, i64 %[[R1]]
+// LLVM:       }
+
+// OGCG-LABEL: _Z17test_builtin_ffsll
+// OGCG:         %[[INPUT:.+]] = load i64, ptr %{{.+}}, align 8
+// OGCG-NEXT:    %[[CTZ:.+]] = call i64 @llvm.cttz.i64(i64 %[[INPUT]], i1 true)
+// OGCG-NEXT:    %[[R1:.+]] = add i64 %[[CTZ]], 1
+// OGCG-NEXT:    %[[IS_ZERO:.+]] = icmp eq i64 %[[INPUT]], 0
+// OGCG-NEXT:    %{{.+}} = select i1 %[[IS_ZERO]], i64 0, i64 %[[R1]]
+// OGCG:       }
+
+int test_builtin_ffsll(long long x) {
+  return __builtin_ffsll(x);
+}
+
+// CIR-LABEL: _Z18test_builtin_ffsllx
+// CIR:         %{{.+}} = cir.ffs %{{.+}} : !s64i
+// CIR:       }
+
+// LLVM-LABEL: _Z18test_builtin_ffsllx
+// LLVM:         %[[INPUT:.+]] = load i64, ptr %{{.+}}, align 8
+// LLVM-NEXT:    %[[CTZ:.+]] = call i64 @llvm.cttz.i64(i64 %[[INPUT]], i1 true)
+// LLVM-NEXT:    %[[R1:.+]] = add i64 %[[CTZ]], 1
+// LLVM-NEXT:    %[[IS_ZERO:.+]] = icmp eq i64 %[[INPUT]], 0
+// LLVM-NEXT:    %{{.+}} = select i1 %[[IS_ZERO]], i64 0, i64 %[[R1]]
+// LLVM:       }
+
+// OGCG-LABEL: _Z18test_builtin_ffsllx
+// OGCG:         %[[INPUT:.+]] = load i64, ptr %{{.+}}, align 8
+// OGCG-NEXT:    %[[CTZ:.+]] = call i64 @llvm.cttz.i64(i64 %[[INPUT]], i1 true)
+// OGCG-NEXT:    %[[R1:.+]] = add i64 %[[CTZ]], 1
+// OGCG-NEXT:    %[[IS_ZERO:.+]] = icmp eq i64 %[[INPUT]], 0
+// OGCG-NEXT:    %{{.+}} = select i1 %[[IS_ZERO]], i64 0, i64 %[[R1]]
+// OGCG:       }
+
 int test_builtin_parity(unsigned x) {
   return __builtin_parity(x);
 }
diff --git a/clang/test/CIR/Transforms/bit.cir b/clang/test/CIR/Transforms/bit.cir
@@ -75,6 +75,27 @@ module {
   // CHECK-NEXT:    cir.return %[[R]] : !u32i
   // CHECK-NEXT:  }
 
+  cir.func @fold_ffs() -> !s32i {
+    // 40 is 0b0010_1000
+    %0 = cir.const #cir.int<40> : !s32i
+    %1 = cir.ffs %0 : !s32i
+    cir.return %1 : !s32i
+  }
+  // CHECK-LABEL: @fold_ffs
+  // CHECK-NEXT:    %[[R:.+]] = cir.const #cir.int<4> : !s32i
+  // CHECK-NEXT:    cir.return %[[R]] : !s32i
+  // CHECK-NEXT:  }
+
+  cir.func @fold_ffs_zero() -> !s32i {
+    %0 = cir.const #cir.int<0> : !s32i
+    %1 = cir.ffs %0 : !s32i
+    cir.return %1 : !s32i
+  }
+  // CHECK-LABEL: @fold_ffs_zero
+  // CHECK-NEXT:    %[[R:.+]] = cir.const #cir.int<0> : !s32i
+  // CHECK-NEXT:    cir.return %[[R]] : !s32i
+  // CHECK-NEXT:  }
+
   cir.func @fold_parity() -> !u32i {
     // 0xdeadbeef is 0b1101_1110_1010_1101_1011_1110_1110_1111
     // 0xdeadbeef contains 24 ones