Skip to content

[Offload][Conformance] Add tests for single-precision math functions #152013

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions offload/unittests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ if (NOT TARGET llvm_gtest)
return ()
endif ()

set(OFFLOAD_UNITTESTS_DIR ${CMAKE_CURRENT_SOURCE_DIR})

function(add_offload_test_device_code test_filename test_name)
set(SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${test_filename})
set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
Expand All @@ -39,6 +41,7 @@ function(add_offload_test_device_code test_filename test_name)
add_custom_command(
OUTPUT ${output_file}
COMMAND ${CMAKE_CXX_COMPILER}
-I${OFFLOAD_UNITTESTS_DIR}
--target=nvptx64-nvidia-cuda -march=${nvptx_arch}
-nogpulib --cuda-path=${cuda_path} -flto ${ARGN}
${SRC_PATH} -o ${output_file}
Expand All @@ -63,6 +66,7 @@ function(add_offload_test_device_code test_filename test_name)
add_custom_command(
OUTPUT ${output_file}
COMMAND ${CMAKE_CXX_COMPILER}
-I${OFFLOAD_UNITTESTS_DIR}
--target=amdgcn-amd-amdhsa -mcpu=${amdgpu_arch}
-nogpulib -flto ${ARGN} ${SRC_PATH} -o ${output_file}
DEPENDS ${SRC_PATH}
Expand Down
2 changes: 1 addition & 1 deletion offload/unittests/Conformance/device_code/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
add_offload_test_device_code(LLVMLibm.cpp llvm-libm -stdlib -fno-builtin)
add_offload_test_device_code(LLVMLibm.cpp llvm-libm -O3 -stdlib -fno-builtin)

add_custom_target(conformance_device_binaries DEPENDS llvm-libm.bin)
set(OFFLOAD_CONFORMANCE_DEVICE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
37 changes: 37 additions & 0 deletions offload/unittests/Conformance/device_code/Common.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file contains common utilities for defining device kernel wrappers to
/// math functions.
///
//===----------------------------------------------------------------------===//

#ifndef CONFORMANCE_DEVICE_CODE_COMMON_HPP
#define CONFORMANCE_DEVICE_CODE_COMMON_HPP

#include <gpuintrin.h>
#include <stddef.h>
#include <stdint.h>

namespace common {

typedef _Float16 float16;

template <auto Func, typename OutType, typename... InTypes>
void runKernelBody(size_t NumElements, OutType *Out, const InTypes *...Ins) {
uint32_t Index =
__gpu_num_threads_x() * __gpu_block_id_x() + __gpu_thread_id_x();

if (Index < NumElements) {
Out[Index] = Func(Ins[Index]...);
}
}
} // namespace common

#endif // CONFORMANCE_DEVICE_CODE_COMMON_HPP
168 changes: 156 additions & 12 deletions offload/unittests/Conformance/device_code/LLVMLibm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,29 +12,173 @@
///
//===----------------------------------------------------------------------===//

#include "Conformance/device_code/Common.hpp"

#include <gpuintrin.h>
#include <math.h>
#include <stddef.h>
#include <stdint.h>

typedef _Float16 float16;
using namespace common;

//===----------------------------------------------------------------------===//
// Helpers
//===----------------------------------------------------------------------===//

static inline float sincosfSin(float X) {
float SinX, CosX;
sincosf(X, &SinX, &CosX);
return SinX;
}

static inline float sincosfCos(float X) {
float SinX, CosX;
sincosf(X, &SinX, &CosX);
return CosX;
}

//===----------------------------------------------------------------------===//
// Kernels
//===----------------------------------------------------------------------===//

extern "C" {

__gpu_kernel void acosfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<acosf>(NumElements, Out, X);
}

__gpu_kernel void acoshfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<acoshf>(NumElements, Out, X);
}

__gpu_kernel void asinfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<asinf>(NumElements, Out, X);
}

__gpu_kernel void asinhfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<asinhf>(NumElements, Out, X);
}

__gpu_kernel void atanfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<atanf>(NumElements, Out, X);
}

__gpu_kernel void atanhfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<atanhf>(NumElements, Out, X);
}

__gpu_kernel void cbrtfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<cbrtf>(NumElements, Out, X);
}

__gpu_kernel void cosfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<cosf>(NumElements, Out, X);
}

__gpu_kernel void coshfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<coshf>(NumElements, Out, X);
}

__gpu_kernel void cospifKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<cospif>(NumElements, Out, X);
}

__gpu_kernel void erffKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<erff>(NumElements, Out, X);
}

__gpu_kernel void expfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<expf>(NumElements, Out, X);
}

__gpu_kernel void exp10fKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<exp10f>(NumElements, Out, X);
}

__gpu_kernel void exp2fKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<exp2f>(NumElements, Out, X);
}

__gpu_kernel void expm1fKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<expm1f>(NumElements, Out, X);
}

__gpu_kernel void hypotf16Kernel(const float16 *X, float16 *Y, float16 *Out,
size_t NumElements) {
uint32_t Index =
__gpu_num_threads_x() * __gpu_block_id_x() + __gpu_thread_id_x();
size_t NumElements) noexcept {
runKernelBody<hypotf16>(NumElements, Out, X, Y);
}

__gpu_kernel void logfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<logf>(NumElements, Out, X);
}

__gpu_kernel void log10fKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<log10f>(NumElements, Out, X);
}

if (Index < NumElements)
Out[Index] = hypotf16(X[Index], Y[Index]);
__gpu_kernel void log1pfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<log1pf>(NumElements, Out, X);
}

__gpu_kernel void logfKernel(const float *X, float *Out, size_t NumElements) {
uint32_t Index =
__gpu_num_threads_x() * __gpu_block_id_x() + __gpu_thread_id_x();
__gpu_kernel void log2fKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<log2f>(NumElements, Out, X);
}

__gpu_kernel void sinfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<sinf>(NumElements, Out, X);
}

__gpu_kernel void sincosfSinKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<sincosfSin>(NumElements, Out, X);
}

__gpu_kernel void sincosfCosKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<sincosfCos>(NumElements, Out, X);
}

__gpu_kernel void sinhfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<sinhf>(NumElements, Out, X);
}

__gpu_kernel void sinpifKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<sinpif>(NumElements, Out, X);
}

__gpu_kernel void tanfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<tanf>(NumElements, Out, X);
}

__gpu_kernel void tanhfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<tanhf>(NumElements, Out, X);
}

if (Index < NumElements)
Out[Index] = logf(X[Index]);
__gpu_kernel void tanpifKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<tanpif>(NumElements, Out, X);
}
} // extern "C"
53 changes: 53 additions & 0 deletions offload/unittests/Conformance/tests/AcosfTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file contains the conformance test of the acosf function.
///
//===----------------------------------------------------------------------===//

#include "mathtest/CommandLineExtras.hpp"
#include "mathtest/ExhaustiveGenerator.hpp"
#include "mathtest/IndexedRange.hpp"
#include "mathtest/TestConfig.hpp"
#include "mathtest/TestRunner.hpp"

#include "llvm/ADT/StringRef.h"

#include <cstdlib>
#include <math.h>

namespace mathtest {

template <> struct FunctionConfig<acosf> {
static constexpr llvm::StringRef Name = "acosf";
static constexpr llvm::StringRef KernelName = "acosfKernel";

// Source: The Khronos Group, The OpenCL C Specification v3.0.19, Sec. 7.4,
// Table 65, Khronos Registry [July 10, 2025].
static constexpr uint64_t UlpTolerance = 4;
};
} // namespace mathtest

int main(int argc, const char **argv) {
llvm::cl::ParseCommandLineOptions(argc, argv,
"Conformance test of the acosf function");

using namespace mathtest;

IndexedRange<float> Range;
ExhaustiveGenerator<float> Generator(Range);

const auto Configs = cl::getTestConfigs();
const llvm::StringRef DeviceBinaryDir = DEVICE_BINARY_DIR;
const bool IsVerbose = cl::IsVerbose;

bool Passed = runTests<acosf>(Generator, Configs, DeviceBinaryDir, IsVerbose);

return Passed ? EXIT_SUCCESS : EXIT_FAILURE;
}
57 changes: 57 additions & 0 deletions offload/unittests/Conformance/tests/AcoshfTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file contains the conformance test of the acoshf function.
///
//===----------------------------------------------------------------------===//

#include "mathtest/CommandLineExtras.hpp"
#include "mathtest/ExhaustiveGenerator.hpp"
#include "mathtest/IndexedRange.hpp"
#include "mathtest/TestConfig.hpp"
#include "mathtest/TestRunner.hpp"

#include "llvm/ADT/StringRef.h"

#include <cstdlib>
#include <limits>
#include <math.h>

namespace mathtest {

template <> struct FunctionConfig<acoshf> {
static constexpr llvm::StringRef Name = "acoshf";
static constexpr llvm::StringRef KernelName = "acoshfKernel";

// Source: The Khronos Group, The OpenCL C Specification v3.0.19, Sec. 7.4,
// Table 65, Khronos Registry [July 10, 2025].
static constexpr uint64_t UlpTolerance = 4;
};
} // namespace mathtest

int main(int argc, const char **argv) {
llvm::cl::ParseCommandLineOptions(argc, argv,
"Conformance test of the acoshf function");

using namespace mathtest;

IndexedRange<float> Range(/*Begin=*/1.0f,
/*End=*/std::numeric_limits<float>::infinity(),
/*Inclusive=*/true);
ExhaustiveGenerator<float> Generator(Range);

const auto Configs = cl::getTestConfigs();
const llvm::StringRef DeviceBinaryDir = DEVICE_BINARY_DIR;
const bool IsVerbose = cl::IsVerbose;

bool Passed =
runTests<acoshf>(Generator, Configs, DeviceBinaryDir, IsVerbose);

return Passed ? EXIT_SUCCESS : EXIT_FAILURE;
}
Loading
Loading