Skip to content

Commit 56944e6

Browse files
authored
[msan] Approximately handle AVX Galois Field Affine Transformation (#150794)
e.g., <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8>, <16 x i8>, i8) <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8>, <32 x i8>, i8) <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8>, <64 x i8>, i8) Out A x b where A and x are packed matrices, b is a vector, Out = A * x + b in GF(2) Multiplication in GF(2) is equivalent to bitwise AND. However, the matrix computation also includes a parity calculation. For the bitwise AND of bits V1 and V2, the exact shadow is: Out_Shadow = (V1_Shadow & V2_Shadow) | (V1 & V2_Shadow) | (V1_Shadow & V2) We approximate the shadow of gf2p8affine using: Out_Shadow = _mm512_gf2p8affine_epi64_epi8(x_Shadow, A_shadow, 0) | _mm512_gf2p8affine_epi64_epi8(x, A_shadow, 0) | _mm512_gf2p8affine_epi64_epi8(x_Shadow, A, 0) | _mm512_set1_epi8(b_Shadow) This approximation has false negatives: if an intermediate dot-product contains an even number of 1's, the parity is 0. It has no false positives. Updates the test from #149258
1 parent 254bfe2 commit 56944e6

File tree

2 files changed

+155
-111
lines changed

2 files changed

+155
-111
lines changed

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4769,6 +4769,79 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
47694769
setOriginForNaryOp(I);
47704770
}
47714771

4772+
// Approximately handle AVX Galois Field Affine Transformation
4773+
//
4774+
// e.g.,
4775+
// <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8>, <16 x i8>, i8)
4776+
// <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8>, <32 x i8>, i8)
4777+
// <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8>, <64 x i8>, i8)
4778+
// Out A x b
4779+
// where A and x are packed matrices, b is a vector,
4780+
// Out = A * x + b in GF(2)
4781+
//
4782+
// Multiplication in GF(2) is equivalent to bitwise AND. However, the matrix
4783+
// computation also includes a parity calculation.
4784+
//
4785+
// For the bitwise AND of bits V1 and V2, the exact shadow is:
4786+
// Out_Shadow = (V1_Shadow & V2_Shadow)
4787+
// | (V1 & V2_Shadow)
4788+
// | (V1_Shadow & V2 )
4789+
//
4790+
// We approximate the shadow of gf2p8affineqb using:
4791+
// Out_Shadow = gf2p8affineqb(x_Shadow, A_shadow, 0)
4792+
// | gf2p8affineqb(x, A_shadow, 0)
4793+
// | gf2p8affineqb(x_Shadow, A, 0)
4794+
// | set1_epi8(b_Shadow)
4795+
//
4796+
// This approximation has false negatives: if an intermediate dot-product
4797+
// contains an even number of 1's, the parity is 0.
4798+
// It has no false positives.
4799+
void handleAVXGF2P8Affine(IntrinsicInst &I) {
4800+
IRBuilder<> IRB(&I);
4801+
4802+
assert(I.arg_size() == 3);
4803+
Value *A = I.getOperand(0);
4804+
Value *X = I.getOperand(1);
4805+
Value *B = I.getOperand(2);
4806+
4807+
assert(isFixedIntVector(A));
4808+
assert(cast<VectorType>(A->getType())
4809+
->getElementType()
4810+
->getScalarSizeInBits() == 8);
4811+
4812+
assert(A->getType() == X->getType());
4813+
4814+
assert(B->getType()->isIntegerTy());
4815+
assert(B->getType()->getScalarSizeInBits() == 8);
4816+
4817+
assert(I.getType() == A->getType());
4818+
4819+
Value *AShadow = getShadow(A);
4820+
Value *XShadow = getShadow(X);
4821+
Value *BZeroShadow = getCleanShadow(B);
4822+
4823+
CallInst *AShadowXShadow = IRB.CreateIntrinsic(
4824+
I.getType(), I.getIntrinsicID(), {XShadow, AShadow, BZeroShadow});
4825+
CallInst *AShadowX = IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(),
4826+
{X, AShadow, BZeroShadow});
4827+
CallInst *XShadowA = IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(),
4828+
{XShadow, A, BZeroShadow});
4829+
4830+
unsigned NumElements = cast<FixedVectorType>(I.getType())->getNumElements();
4831+
Value *BShadow = getShadow(B);
4832+
Value *BBroadcastShadow = getCleanShadow(AShadow);
4833+
// There is no LLVM IR intrinsic for _mm512_set1_epi8.
4834+
// This loop generates a lot of LLVM IR, which we expect that CodeGen will
4835+
// lower appropriately (e.g., VPBROADCASTB).
4836+
// Besides, b is often a constant, in which case it is fully initialized.
4837+
for (unsigned i = 0; i < NumElements; i++)
4838+
BBroadcastShadow = IRB.CreateInsertElement(BBroadcastShadow, BShadow, i);
4839+
4840+
setShadow(&I, IRB.CreateOr(
4841+
{AShadowXShadow, AShadowX, XShadowA, BBroadcastShadow}));
4842+
setOriginForNaryOp(I);
4843+
}
4844+
47724845
// Handle Arm NEON vector load intrinsics (vld*).
47734846
//
47744847
// The WithLane instructions (ld[234]lane) are similar to:
@@ -5604,6 +5677,13 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
56045677
break;
56055678
}
56065679

5680+
// AVX Galois Field New Instructions
5681+
case Intrinsic::x86_vgf2p8affineqb_128:
5682+
case Intrinsic::x86_vgf2p8affineqb_256:
5683+
case Intrinsic::x86_vgf2p8affineqb_512:
5684+
handleAVXGF2P8Affine(I);
5685+
break;
5686+
56075687
case Intrinsic::fshl:
56085688
case Intrinsic::fshr:
56095689
handleFunnelShift(I);

0 commit comments

Comments
 (0)