Skip to content

Commit fef9db1

Browse files
Build device code as C++
1 parent 5f2a8cd commit fef9db1

File tree

15 files changed

+34
-22
lines changed

15 files changed

+34
-22
lines changed

offload/unittests/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ function(add_offload_test_device_code test_filename test_name)
3838
set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${test_name}.nvptx64.bin")
3939
add_custom_command(
4040
OUTPUT ${output_file}
41-
COMMAND ${CMAKE_C_COMPILER}
41+
COMMAND ${CMAKE_CXX_COMPILER}
4242
--target=nvptx64-nvidia-cuda -march=${nvptx_arch}
4343
-nogpulib --cuda-path=${CUDA_ROOT} -flto ${ARGN}
4444
${SRC_PATH} -o ${output_file}
@@ -62,7 +62,7 @@ function(add_offload_test_device_code test_filename test_name)
6262
set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${test_name}.amdgpu.bin")
6363
add_custom_command(
6464
OUTPUT ${output_file}
65-
COMMAND ${CMAKE_C_COMPILER}
65+
COMMAND ${CMAKE_CXX_COMPILER}
6666
--target=amdgcn-amd-amdhsa -mcpu=${amdgpu_arch}
6767
-nogpulib -flto ${ARGN} ${SRC_PATH} -o ${output_file}
6868
DEPENDS ${SRC_PATH}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
add_offload_test_device_code(LLVMLibm.c llvm-libm -stdlib -fno-builtin)
1+
add_offload_test_device_code(LLVMLibm.cpp llvm-libm -stdlib -fno-builtin)
22

33
add_custom_target(conformance_device_binaries DEPENDS llvm-libm.bin)
44
set(OFFLOAD_CONFORMANCE_DEVICE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)

offload/unittests/Conformance/device_code/LLVMLibm.c renamed to offload/unittests/Conformance/device_code/LLVMLibm.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
typedef _Float16 float16;
2121

22+
extern "C" {
23+
2224
__gpu_kernel void hypotf16Kernel(const float16 *X, float16 *Y, float16 *Out,
2325
size_t NumElements) {
2426
uint32_t Index =
@@ -35,3 +37,4 @@ __gpu_kernel void logfKernel(const float *X, float *Out, size_t NumElements) {
3537
if (Index < NumElements)
3638
Out[Index] = logf(X[Index]);
3739
}
40+
} // extern "C"

offload/unittests/OffloadAPI/device_code/CMakeLists.txt

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
1-
add_offload_test_device_code(foo.c foo)
2-
add_offload_test_device_code(bar.c bar)
1+
add_offload_test_device_code(foo.cpp foo)
2+
add_offload_test_device_code(bar.cpp bar)
33
# Compile with optimizations to eliminate AMDGPU implicit arguments.
4-
add_offload_test_device_code(noargs.c noargs -O3)
5-
add_offload_test_device_code(localmem.c localmem)
6-
add_offload_test_device_code(localmem_reduction.c localmem_reduction)
7-
add_offload_test_device_code(localmem_static.c localmem_static)
8-
add_offload_test_device_code(global.c global)
9-
add_offload_test_device_code(global_ctor.c global_ctor)
10-
add_offload_test_device_code(global_dtor.c global_dtor)
11-
add_offload_test_device_code(sequence.c sequence)
4+
add_offload_test_device_code(noargs.cpp noargs -O3)
5+
add_offload_test_device_code(localmem.cpp localmem)
6+
add_offload_test_device_code(localmem_reduction.cpp localmem_reduction)
7+
add_offload_test_device_code(localmem_static.cpp localmem_static)
8+
add_offload_test_device_code(global.cpp global)
9+
add_offload_test_device_code(global_ctor.cpp global_ctor)
10+
add_offload_test_device_code(global_dtor.cpp global_dtor)
11+
add_offload_test_device_code(sequence.cpp sequence)
1212

1313
add_custom_target(offload_device_binaries DEPENDS
1414
foo.bin
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#include <gpuintrin.h>
22

3-
__gpu_kernel void foo(int *out) {
3+
extern "C" __gpu_kernel void foo(int *out) {
44
out[__gpu_thread_id(0)] = __gpu_thread_id(0) + 1;
55
}
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#include <gpuintrin.h>
22
#include <stdint.h>
33

4-
__gpu_kernel void foo(uint32_t *out) {
4+
extern "C" __gpu_kernel void foo(uint32_t *out) {
55
out[__gpu_thread_id(0)] = __gpu_thread_id(0);
66
}

offload/unittests/OffloadAPI/device_code/global.c renamed to offload/unittests/OffloadAPI/device_code/global.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#include <gpuintrin.h>
22
#include <stdint.h>
33

4+
extern "C" {
5+
46
[[gnu::visibility("default")]]
57
uint32_t global[64];
68

@@ -13,3 +15,4 @@ __gpu_kernel void read(uint32_t *out) {
1315
out[__gpu_thread_id(0) + (__gpu_num_threads(0) * __gpu_block_id(0))] =
1416
global[__gpu_thread_id(0)];
1517
}
18+
} // extern "C"

offload/unittests/OffloadAPI/device_code/global_ctor.c renamed to offload/unittests/OffloadAPI/device_code/global_ctor.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#include <gpuintrin.h>
22
#include <stdint.h>
33

4+
extern "C" {
5+
46
uint32_t global[64];
57

68
[[gnu::constructor(202)]] void ctorc() {
@@ -23,3 +25,4 @@ __gpu_kernel void global_ctor(uint32_t *out) {
2325
out[__gpu_thread_id(0) + (__gpu_num_threads(0) * __gpu_block_id(0))] =
2426
global[__gpu_thread_id(0)];
2527
}
28+
} // extern "C"

offload/unittests/OffloadAPI/device_code/global_dtor.c renamed to offload/unittests/OffloadAPI/device_code/global_dtor.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#include <gpuintrin.h>
22
#include <stdint.h>
33

4+
extern "C" {
5+
46
uint32_t global[64];
57

68
[[gnu::destructor]] void dtor() {
@@ -11,3 +13,4 @@ uint32_t global[64];
1113
__gpu_kernel void global_dtor() {
1214
// no-op
1315
}
16+
} // extern "C"

offload/unittests/OffloadAPI/device_code/localmem.c renamed to offload/unittests/OffloadAPI/device_code/localmem.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
extern __gpu_local uint32_t shared_mem[];
55

6-
__gpu_kernel void localmem(uint32_t *out) {
6+
extern "C" __gpu_kernel void localmem(uint32_t *out) {
77
shared_mem[__gpu_thread_id(0)] = __gpu_thread_id(0);
88
shared_mem[__gpu_thread_id(0)] *= 2;
99
out[__gpu_thread_id(0) + (__gpu_num_threads(0) * __gpu_block_id(0))] =

0 commit comments

Comments
 (0)