diff --git a/offload/unittests/CMakeLists.txt b/offload/unittests/CMakeLists.txt index 24826a1dcb756..117c88b742f8b 100644 --- a/offload/unittests/CMakeLists.txt +++ b/offload/unittests/CMakeLists.txt @@ -38,7 +38,7 @@ function(add_offload_test_device_code test_filename test_name) set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${test_name}.nvptx64.bin") add_custom_command( OUTPUT ${output_file} - COMMAND ${CMAKE_C_COMPILER} + COMMAND ${CMAKE_CXX_COMPILER} --target=nvptx64-nvidia-cuda -march=${nvptx_arch} -nogpulib --cuda-path=${cuda_path} -flto ${ARGN} ${SRC_PATH} -o ${output_file} @@ -62,7 +62,7 @@ function(add_offload_test_device_code test_filename test_name) set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${test_name}.amdgpu.bin") add_custom_command( OUTPUT ${output_file} - COMMAND ${CMAKE_C_COMPILER} + COMMAND ${CMAKE_CXX_COMPILER} --target=amdgcn-amd-amdhsa -mcpu=${amdgpu_arch} -nogpulib -flto ${ARGN} ${SRC_PATH} -o ${output_file} DEPENDS ${SRC_PATH} diff --git a/offload/unittests/Conformance/device_code/CMakeLists.txt b/offload/unittests/Conformance/device_code/CMakeLists.txt index 18f54b8dc5252..9cbd11096292c 100644 --- a/offload/unittests/Conformance/device_code/CMakeLists.txt +++ b/offload/unittests/Conformance/device_code/CMakeLists.txt @@ -1,4 +1,4 @@ -add_offload_test_device_code(LLVMLibm.c llvm-libm -stdlib -fno-builtin) +add_offload_test_device_code(LLVMLibm.cpp llvm-libm -stdlib -fno-builtin) add_custom_target(conformance_device_binaries DEPENDS llvm-libm.bin) set(OFFLOAD_CONFORMANCE_DEVICE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE) diff --git a/offload/unittests/Conformance/device_code/LLVMLibm.c b/offload/unittests/Conformance/device_code/LLVMLibm.cpp similarity index 97% rename from offload/unittests/Conformance/device_code/LLVMLibm.c rename to offload/unittests/Conformance/device_code/LLVMLibm.cpp index fe5196a539455..2c3d9bc5bf5cf 100644 --- a/offload/unittests/Conformance/device_code/LLVMLibm.c +++ b/offload/unittests/Conformance/device_code/LLVMLibm.cpp @@ -19,6 +19,8 @@ typedef _Float16 float16; +extern "C" { + __gpu_kernel void hypotf16Kernel(const float16 *X, float16 *Y, float16 *Out, size_t NumElements) { uint32_t Index = @@ -35,3 +37,4 @@ __gpu_kernel void logfKernel(const float *X, float *Out, size_t NumElements) { if (Index < NumElements) Out[Index] = logf(X[Index]); } +} // extern "C" diff --git a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt index 0e4695ee9969f..50e430597e646 100644 --- a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt +++ b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt @@ -1,14 +1,14 @@ -add_offload_test_device_code(foo.c foo) -add_offload_test_device_code(bar.c bar) +add_offload_test_device_code(foo.cpp foo) +add_offload_test_device_code(bar.cpp bar) # Compile with optimizations to eliminate AMDGPU implicit arguments. -add_offload_test_device_code(noargs.c noargs -O3) -add_offload_test_device_code(localmem.c localmem) -add_offload_test_device_code(localmem_reduction.c localmem_reduction) -add_offload_test_device_code(localmem_static.c localmem_static) -add_offload_test_device_code(global.c global) -add_offload_test_device_code(global_ctor.c global_ctor) -add_offload_test_device_code(global_dtor.c global_dtor) -add_offload_test_device_code(sequence.c sequence) +add_offload_test_device_code(noargs.cpp noargs -O3) +add_offload_test_device_code(localmem.cpp localmem) +add_offload_test_device_code(localmem_reduction.cpp localmem_reduction) +add_offload_test_device_code(localmem_static.cpp localmem_static) +add_offload_test_device_code(global.cpp global) +add_offload_test_device_code(global_ctor.cpp global_ctor) +add_offload_test_device_code(global_dtor.cpp global_dtor) +add_offload_test_device_code(sequence.cpp sequence) add_custom_target(offload_device_binaries DEPENDS foo.bin diff --git a/offload/unittests/OffloadAPI/device_code/bar.c b/offload/unittests/OffloadAPI/device_code/bar.cpp similarity index 63% rename from offload/unittests/OffloadAPI/device_code/bar.c rename to offload/unittests/OffloadAPI/device_code/bar.cpp index 786aa2f5d61e7..b5191671f293f 100644 --- a/offload/unittests/OffloadAPI/device_code/bar.c +++ b/offload/unittests/OffloadAPI/device_code/bar.cpp @@ -1,5 +1,5 @@ #include -__gpu_kernel void foo(int *out) { +extern "C" __gpu_kernel void foo(int *out) { out[__gpu_thread_id(0)] = __gpu_thread_id(0) + 1; } diff --git a/offload/unittests/OffloadAPI/device_code/foo.c b/offload/unittests/OffloadAPI/device_code/foo.cpp similarity index 65% rename from offload/unittests/OffloadAPI/device_code/foo.c rename to offload/unittests/OffloadAPI/device_code/foo.cpp index 83cdc53cddd8d..cdc20015fc3e2 100644 --- a/offload/unittests/OffloadAPI/device_code/foo.c +++ b/offload/unittests/OffloadAPI/device_code/foo.cpp @@ -1,6 +1,6 @@ #include #include -__gpu_kernel void foo(uint32_t *out) { +extern "C" __gpu_kernel void foo(uint32_t *out) { out[__gpu_thread_id(0)] = __gpu_thread_id(0); } diff --git a/offload/unittests/OffloadAPI/device_code/global.c b/offload/unittests/OffloadAPI/device_code/global.cpp similarity index 92% rename from offload/unittests/OffloadAPI/device_code/global.c rename to offload/unittests/OffloadAPI/device_code/global.cpp index 9f27f9424324f..dada16c87766c 100644 --- a/offload/unittests/OffloadAPI/device_code/global.c +++ b/offload/unittests/OffloadAPI/device_code/global.cpp @@ -1,6 +1,8 @@ #include #include +extern "C" { + [[gnu::visibility("default")]] uint32_t global[64]; @@ -13,3 +15,4 @@ __gpu_kernel void read(uint32_t *out) { out[__gpu_thread_id(0) + (__gpu_num_threads(0) * __gpu_block_id(0))] = global[__gpu_thread_id(0)]; } +} // extern "C" diff --git a/offload/unittests/OffloadAPI/device_code/global_ctor.c b/offload/unittests/OffloadAPI/device_code/global_ctor.cpp similarity index 95% rename from offload/unittests/OffloadAPI/device_code/global_ctor.c rename to offload/unittests/OffloadAPI/device_code/global_ctor.cpp index 27e2d71d7566e..a14f1d59bf950 100644 --- a/offload/unittests/OffloadAPI/device_code/global_ctor.c +++ b/offload/unittests/OffloadAPI/device_code/global_ctor.cpp @@ -1,6 +1,8 @@ #include #include +extern "C" { + uint32_t global[64]; [[gnu::constructor(202)]] void ctorc() { @@ -23,3 +25,4 @@ __gpu_kernel void global_ctor(uint32_t *out) { out[__gpu_thread_id(0) + (__gpu_num_threads(0) * __gpu_block_id(0))] = global[__gpu_thread_id(0)]; } +} // extern "C" diff --git a/offload/unittests/OffloadAPI/device_code/global_dtor.c b/offload/unittests/OffloadAPI/device_code/global_dtor.cpp similarity index 87% rename from offload/unittests/OffloadAPI/device_code/global_dtor.c rename to offload/unittests/OffloadAPI/device_code/global_dtor.cpp index cadcc19cc296b..6b1f941342b40 100644 --- a/offload/unittests/OffloadAPI/device_code/global_dtor.c +++ b/offload/unittests/OffloadAPI/device_code/global_dtor.cpp @@ -1,6 +1,8 @@ #include #include +extern "C" { + uint32_t global[64]; [[gnu::destructor]] void dtor() { @@ -11,3 +13,4 @@ uint32_t global[64]; __gpu_kernel void global_dtor() { // no-op } +} // extern "C" diff --git a/offload/unittests/OffloadAPI/device_code/localmem.c b/offload/unittests/OffloadAPI/device_code/localmem.cpp similarity index 84% rename from offload/unittests/OffloadAPI/device_code/localmem.c rename to offload/unittests/OffloadAPI/device_code/localmem.cpp index d70847900bc43..9542e2cb1d648 100644 --- a/offload/unittests/OffloadAPI/device_code/localmem.c +++ b/offload/unittests/OffloadAPI/device_code/localmem.cpp @@ -3,7 +3,7 @@ extern __gpu_local uint32_t shared_mem[]; -__gpu_kernel void localmem(uint32_t *out) { +extern "C" __gpu_kernel void localmem(uint32_t *out) { shared_mem[__gpu_thread_id(0)] = __gpu_thread_id(0); shared_mem[__gpu_thread_id(0)] *= 2; out[__gpu_thread_id(0) + (__gpu_num_threads(0) * __gpu_block_id(0))] = diff --git a/offload/unittests/OffloadAPI/device_code/localmem_reduction.c b/offload/unittests/OffloadAPI/device_code/localmem_reduction.cpp similarity index 83% rename from offload/unittests/OffloadAPI/device_code/localmem_reduction.c rename to offload/unittests/OffloadAPI/device_code/localmem_reduction.cpp index 8a9a46cfb6a11..2c0a3e80b16e7 100644 --- a/offload/unittests/OffloadAPI/device_code/localmem_reduction.c +++ b/offload/unittests/OffloadAPI/device_code/localmem_reduction.cpp @@ -3,7 +3,7 @@ extern __gpu_local uint32_t shared_mem[]; -__gpu_kernel void localmem_reduction(uint32_t *out) { +extern "C" __gpu_kernel void localmem_reduction(uint32_t *out) { shared_mem[__gpu_thread_id(0)] = 2; __gpu_sync_threads(); diff --git a/offload/unittests/OffloadAPI/device_code/localmem_static.c b/offload/unittests/OffloadAPI/device_code/localmem_static.cpp similarity index 85% rename from offload/unittests/OffloadAPI/device_code/localmem_static.c rename to offload/unittests/OffloadAPI/device_code/localmem_static.cpp index 928b48422a0d6..a8dd95473742c 100644 --- a/offload/unittests/OffloadAPI/device_code/localmem_static.c +++ b/offload/unittests/OffloadAPI/device_code/localmem_static.cpp @@ -4,7 +4,7 @@ [[clang::loader_uninitialized]] __gpu_local uint32_t shared_mem[64]; -__gpu_kernel void localmem_static(uint32_t *out) { +extern "C" __gpu_kernel void localmem_static(uint32_t *out) { shared_mem[__gpu_thread_id(0)] = 2; __gpu_sync_threads(); diff --git a/offload/unittests/OffloadAPI/device_code/noargs.c b/offload/unittests/OffloadAPI/device_code/noargs.c deleted file mode 100644 index 36e609aa26a09..0000000000000 --- a/offload/unittests/OffloadAPI/device_code/noargs.c +++ /dev/null @@ -1,3 +0,0 @@ -#include - -__gpu_kernel void noargs() { (void)0; } diff --git a/offload/unittests/OffloadAPI/device_code/noargs.cpp b/offload/unittests/OffloadAPI/device_code/noargs.cpp new file mode 100644 index 0000000000000..58f989c714fed --- /dev/null +++ b/offload/unittests/OffloadAPI/device_code/noargs.cpp @@ -0,0 +1,3 @@ +#include + +extern "C" __gpu_kernel void noargs() { (void)0; } diff --git a/offload/unittests/OffloadAPI/device_code/sequence.c b/offload/unittests/OffloadAPI/device_code/sequence.cpp similarity index 71% rename from offload/unittests/OffloadAPI/device_code/sequence.c rename to offload/unittests/OffloadAPI/device_code/sequence.cpp index 7662f2d817496..07f92944346f5 100644 --- a/offload/unittests/OffloadAPI/device_code/sequence.c +++ b/offload/unittests/OffloadAPI/device_code/sequence.cpp @@ -1,7 +1,7 @@ #include #include -__gpu_kernel void sequence(uint32_t idx, uint32_t *inout) { +extern "C" __gpu_kernel void sequence(uint32_t idx, uint32_t *inout) { if (idx == 0) inout[idx] = 0; else if (idx == 1)